PyPI - bigdl-core-cpp - Versions diffs - 2.7.0b20250629__py3-none-win_amd64.whl → 2.7.0b20250701__py3-none-win_amd64.whl - Mend

bigdl-core-cpp 2.7.0b20250629__py3-none-win_amd64.whl → 2.7.0b20250701__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

bigdl/cpp/convert_hf_to_gguf.py +1987 -558
bigdl/cpp/convert_hf_to_gguf_update.py +131 -67
bigdl/cpp/convert_lora_to_gguf.py +3 -3
bigdl/cpp/gguf-py/gguf/constants.py +546 -16
bigdl/cpp/gguf-py/gguf/gguf_reader.py +57 -6
bigdl/cpp/gguf-py/gguf/gguf_writer.py +119 -7
bigdl/cpp/gguf-py/gguf/lazy.py +10 -0
bigdl/cpp/gguf-py/gguf/metadata.py +28 -8
bigdl/cpp/gguf-py/gguf/tensor_mapping.py +461 -48
bigdl/cpp/gguf-py/gguf/utility.py +195 -0
bigdl/cpp/gguf-py/gguf/vocab.py +6 -1
bigdl/cpp/libs/llama_cpp/ggml-base.dll +0 -0
bigdl/cpp/libs/llama_cpp/ggml-cpu.dll +0 -0
bigdl/cpp/libs/llama_cpp/ggml-sycl.dll +0 -0
bigdl/cpp/libs/llama_cpp/ggml.dll +0 -0
bigdl/cpp/libs/llama_cpp/llama-batched.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-bench.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-cli.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-embedding.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-gemma3-cli.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-gguf.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-llava-cli.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-lookup.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-ls-sycl-device.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-minicpmv-cli.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-perplexity.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-quantize.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-server.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-simple.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-speculative.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama-tokenize.exe +0 -0
bigdl/cpp/libs/llama_cpp/llama.dll +0 -0
bigdl/cpp/libs/ollama/ggml-base.dll +0 -0
bigdl/cpp/libs/ollama/ggml-cpu.dll +0 -0
bigdl/cpp/libs/ollama/ggml-sycl.dll +0 -0
bigdl/cpp/libs/ollama/ggml.dll +0 -0
bigdl/cpp/libs/ollama/llama.dll +0 -0
bigdl/cpp/libs/ollama/llava_shared.dll +0 -0
bigdl/cpp/libs/ollama/mtmd_shared.dll +0 -0
bigdl/cpp/libs/ollama/ollama-lib.exe +0 -0
bigdl/cpp/libs/ollama/ollama.exe +0 -0
{bigdl_core_cpp-2.7.0b20250629.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-ollama.bat +1 -5
{bigdl_core_cpp-2.7.0b20250629.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/METADATA +1 -1
bigdl_core_cpp-2.7.0b20250701.dist-info/RECORD +56 -0
bigdl/cpp/libs/llama_cpp/llava_shared.dll +0 -0
bigdl_core_cpp-2.7.0b20250629.dist-info/RECORD +0 -56
{bigdl_core_cpp-2.7.0b20250629.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-llama-cpp.bat +0 -0
{bigdl_core_cpp-2.7.0b20250629.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-llama-cpp.ps1 +0 -0
{bigdl_core_cpp-2.7.0b20250629.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/WHEEL +0 -0
{bigdl_core_cpp-2.7.0b20250629.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/top_level.txt +0 -0

bigdl/cpp/gguf-py/gguf/gguf_reader.py CHANGED Viewed

@@ -6,6 +6,7 @@ from __future__ import annotations
 import logging
 import os
+import sys
 from collections import OrderedDict
 from typing import Any, Literal, NamedTuple, TypeVar, Union
@@ -15,7 +16,6 @@ import numpy.typing as npt
 from .quants import quant_shape_to_byte_shape
 if __name__ == "__main__":
-    import sys
     from pathlib import Path
     # Allow running file in package as a script.
@@ -28,6 +28,7 @@ from gguf.constants import (
     GGUF_VERSION,
     GGMLQuantizationType,
     GGUFValueType,
+    GGUFEndian,
 )
 logger = logging.getLogger(__name__)
@@ -53,6 +54,48 @@ class ReaderField(NamedTuple):
     types: list[GGUFValueType] = []
+    def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
+        if self.types:
+            to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731
+            main_type = self.types[0]
+            if main_type == GGUFValueType.ARRAY:
+                sub_type = self.types[-1]
+                if sub_type == GGUFValueType.STRING:
+                    indices = self.data[index_or_slice]
+                    if isinstance(index_or_slice, int):
+                        return to_string(self.parts[indices]) # type: ignore
+                    else:
+                        return [to_string(self.parts[idx]) for idx in indices] # type: ignore
+                else:
+                    # FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
+                    # Check if it's unsafe to perform slice optimization on data
+                    # if any(True for idx in self.data if len(self.parts[idx]) != 1):
+                    #     optim_slice = slice(None)
+                    # else:
+                    #     optim_slice = index_or_slice
+                    #     index_or_slice = slice(None)
+                    # if isinstance(optim_slice, int):
+                    #     return self.parts[self.data[optim_slice]].tolist()[0]
+                    # else:
+                    #     return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
+                    if isinstance(index_or_slice, int):
+                        return self.parts[self.data[index_or_slice]].tolist()[0]
+                    else:
+                        return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()]
+            if main_type == GGUFValueType.STRING:
+                return to_string(self.parts[-1])
+            else:
+                return self.parts[-1].tolist()[0]
+        return None
 class ReaderTensor(NamedTuple):
     name: str
@@ -101,10 +144,19 @@ class GGUFReader:
             # If we get 0 here that means it's (probably) a GGUF file created for
             # the opposite byte order of the machine this script is running on.
             self.byte_order = 'S'
-            temp_version = temp_version.newbyteorder(self.byte_order)
+            temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order))
         version = temp_version[0]
         if version not in READER_SUPPORTED_VERSIONS:
             raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
+        if sys.byteorder == "little":
+            # Host is little endian
+            host_endian = GGUFEndian.LITTLE
+            swapped_endian = GGUFEndian.BIG
+        else:
+            # Sorry PDP or other weird systems that don't use BE or LE.
+            host_endian = GGUFEndian.BIG
+            swapped_endian = GGUFEndian.LITTLE
+        self.endianess = swapped_endian if self.byte_order == "S" else host_endian
         self.fields: OrderedDict[str, ReaderField] = OrderedDict()
         self.tensors: list[ReaderTensor] = []
         offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
@@ -146,9 +198,7 @@ class GGUFReader:
         itemsize = int(np.empty([], dtype = dtype).itemsize)
         end_offs = offset + itemsize * count
         arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
-        if override_order is None:
-            return arr
-        return arr.view(arr.dtype.newbyteorder(override_order))
+        return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order))
     def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
         if field.name in self.fields:
@@ -190,6 +240,7 @@ class GGUFReader:
             offs += int(alen.nbytes)
             aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
             data_idxs: list[int] = []
+            # FIXME: Handle multi-dimensional arrays properly instead of flattening
             for idx in range(alen[0]):
                 curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
                 if idx == 0:
@@ -200,7 +251,7 @@ class GGUFReader:
                 offs += curr_size
             return offs - orig_offs, aparts, data_idxs, types
         # We can't deal with this one.
-        raise ValueError('Unknown/unhandled field type {gtype}')
+        raise ValueError(f'Unknown/unhandled field type {gtype}')
     def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
         offs = orig_offs

bigdl/cpp/gguf-py/gguf/gguf_writer.py CHANGED Viewed

@@ -49,6 +49,7 @@ class TensorInfo:
 class GGUFValue:
     value: Any
     type: GGUFValueType
+    sub_type: GGUFValueType | None = None
 class WriterState(Enum):
@@ -238,7 +239,7 @@ class GGUFWriter:
             for key, val in kv_data.items():
                 kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
-                kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
+                kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
             fout.write(kv_bytes)
@@ -268,11 +269,11 @@ class GGUFWriter:
             fout.flush()
         self.state = WriterState.TI_DATA
-    def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
+    def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
         if any(key in kv_data for kv_data in self.kv_data):
-            raise ValueError(f'Duplicated key name {key!r}')
+            logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
-        self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
+        self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
     def add_uint8(self, key: str, val: int) -> None:
         self.add_key_value(key,val, GGUFValueType.UINT8)
@@ -689,6 +690,12 @@ class GGUFWriter:
     def add_value_length(self, length: int) -> None:
         self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
+    def add_key_length_mla(self, length: int) -> None:
+        self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
+    def add_value_length_mla(self, length: int) -> None:
+        self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
     def add_max_alibi_bias(self, bias: float) -> None:
         self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
@@ -722,6 +729,9 @@ class GGUFWriter:
     def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
         self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
+    def add_moe_every_n_layers(self, value: int) -> None:
+        self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
     def add_swin_norm(self, value: bool) -> None:
         self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
@@ -746,6 +756,9 @@ class GGUFWriter:
     def add_token_shift_count(self, count: int) -> None:
         self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
+    def add_interleave_moe_layer_step(self, value: int) -> None:
+        self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value)
     def add_layer_norm_eps(self, value: float) -> None:
         self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
@@ -767,6 +780,18 @@ class GGUFWriter:
     def add_kv_lora_rank(self, length: int) -> None:
         self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
+    def add_decay_lora_rank(self, length: int) -> None:
+        self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
+    def add_iclr_lora_rank(self, length: int) -> None:
+        self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
+    def add_value_residual_mix_lora_rank(self, length: int) -> None:
+        self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length)
+    def add_gate_lora_rank(self, length: int) -> None:
+        self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
     def add_relative_attn_buckets_count(self, value: int) -> None:
         self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
@@ -872,7 +897,7 @@ class GGUFWriter:
     def add_remove_extra_whitespaces(self, value: bool) -> None:
         self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
-    def add_precompiled_charsmap(self, charsmap: Sequence[bytes]) -> None:
+    def add_precompiled_charsmap(self, charsmap: bytes) -> None:
         self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
     def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
@@ -910,13 +935,98 @@ class GGUFWriter:
     def add_eom_token_id(self, id: int) -> None:
         self.add_uint32(Keys.Tokenizer.EOM_ID, id)
+    def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
+        self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
+    # for vision models
+    def add_clip_has_vision_encoder(self, value: bool) -> None:
+        self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
+    def add_clip_has_audio_encoder(self, value: bool) -> None:
+        self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
+    def add_clip_projector_type(self, value: str) -> None:
+        self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
+    def add_vision_projection_dim(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
+    def add_vision_patch_size(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
+    def add_vision_embedding_length(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
+    def add_vision_feed_forward_length(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
+    def add_vision_block_count(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
+    def add_vision_head_count(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
+    def add_vision_attention_layernorm_eps(self, value: float) -> None:
+        self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
+    def add_vision_image_size(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
+    def add_vision_image_mean(self, values: Sequence[float]) -> None:
+        self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
+    def add_vision_image_std(self, values: Sequence[float]) -> None:
+        self.add_array(Keys.ClipVision.IMAGE_STD, values)
+    def add_vision_spatial_merge_size(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
+    def add_vision_use_gelu(self, value: bool) -> None:
+        self.add_bool(Keys.ClipVision.USE_GELU, value)
+    def add_vision_use_silu(self, value: bool) -> None:
+        self.add_bool(Keys.ClipVision.USE_SILU, value)
+    def add_vision_projector_scale_factor(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
+    def add_vision_n_wa_pattern(self, value: int) -> None:
+        self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
+    # audio models
+    def add_audio_projection_dim(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
+    def add_audio_embedding_length(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
+    def add_audio_feed_forward_length(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
+    def add_audio_block_count(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
+    def add_audio_head_count(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
+    def add_audio_attention_layernorm_eps(self, value: float) -> None:
+        self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
+    def add_audio_num_mel_bins(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
+    def add_audio_stack_factor(self, value: int) -> None:
+        self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
     def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
         pack_prefix = ''
         if not skip_pack_prefix:
             pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
         return struct.pack(f'{pack_prefix}{fmt}', value)
-    def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
+    def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
         kv_data = bytearray()
         if add_vtype:
@@ -937,7 +1047,9 @@ class GGUFWriter:
             if len(val) == 0:
                 raise ValueError("Invalid GGUF metadata array. Empty array")
-            if isinstance(val, bytes):
+            if sub_type is not None:
+                ltype = sub_type
+            elif isinstance(val, bytes):
                 ltype = GGUFValueType.UINT8
             else:
                 ltype = GGUFValueType.get_type(val[0])

bigdl/cpp/gguf-py/gguf/lazy.py CHANGED Viewed

@@ -139,6 +139,16 @@ class LazyBase(ABC, metaclass=LazyMeta):
             if isinstance(res, cls._tensor_type):
                 return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
+            elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res):
+                # share the evaluation between lazy tuple elements
+                shared_args: list = [args, None]
+                def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase:
+                    assert len(a) == 2
+                    if a[1] is None:
+                        a[1] = fn(*a[0], **kw)
+                    return a[1][i]
+                return tuple(cls(meta=cls.eager_to_meta(res[i]), args=(shared_args, i), kwargs=kwargs, func=eager_tuple_element) for i in range(len(res)))
             else:
                 del res  # not needed
                 # non-tensor return likely relies on the contents of the args

bigdl/cpp/gguf-py/gguf/metadata.py CHANGED Viewed

@@ -121,19 +121,39 @@ class Metadata:
         if not model_card_path.is_file():
             return {}
-        # The model card metadata is assumed to always be in YAML
+        # The model card metadata is assumed to always be in YAML (frontmatter)
         # ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
+        yaml_content: str = ""
         with open(model_card_path, "r", encoding="utf-8") as f:
-            if f.readline() == "---\n":
-                raw = f.read().partition("---\n")[0]
-                data = yaml.safe_load(raw)
-                if isinstance(data, dict):
-                    return data
+            content = f.read()
+            lines = content.splitlines()
+            lines_yaml = []
+            if len(lines) == 0:
+                # Empty file
+                return {}
+            if len(lines) > 0 and lines[0] != "---":
+                # No frontmatter
+                return {}
+            for line in lines[1:]:
+                if line == "---":
+                    break # End of frontmatter
                 else:
-                    logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
-                    return {}
+                    lines_yaml.append(line)
+            yaml_content = "\n".join(lines_yaml) + "\n"
+        # Quick hack to fix the Norway problem
+        # https://hitchdev.com/strictyaml/why/implicit-typing-removed/
+        yaml_content = yaml_content.replace("- no\n", "- \"no\"\n")
+        if yaml_content:
+            data = yaml.safe_load(yaml_content)
+            if isinstance(data, dict):
+                return data
             else:
+                logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
                 return {}
+        else:
+            return {}
     @staticmethod
     def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]: