bigdl-core-cpp 2.7.0b20250629__py3-none-win_amd64.whl → 2.7.0b20250701__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +1987 -558
- bigdl/cpp/convert_hf_to_gguf_update.py +131 -67
- bigdl/cpp/convert_lora_to_gguf.py +3 -3
- bigdl/cpp/gguf-py/gguf/constants.py +546 -16
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +57 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +119 -7
- bigdl/cpp/gguf-py/gguf/lazy.py +10 -0
- bigdl/cpp/gguf-py/gguf/metadata.py +28 -8
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +461 -48
- bigdl/cpp/gguf-py/gguf/utility.py +195 -0
- bigdl/cpp/gguf-py/gguf/vocab.py +6 -1
- bigdl/cpp/libs/llama_cpp/ggml-base.dll +0 -0
- bigdl/cpp/libs/llama_cpp/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/llama_cpp/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/llama_cpp/ggml.dll +0 -0
- bigdl/cpp/libs/llama_cpp/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-server.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama_cpp/llama.dll +0 -0
- bigdl/cpp/libs/ollama/ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama/ggml.dll +0 -0
- bigdl/cpp/libs/ollama/llama.dll +0 -0
- bigdl/cpp/libs/ollama/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama/mtmd_shared.dll +0 -0
- bigdl/cpp/libs/ollama/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama/ollama.exe +0 -0
- {bigdl_core_cpp-2.7.0b20250629.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-ollama.bat +1 -5
- {bigdl_core_cpp-2.7.0b20250629.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.7.0b20250701.dist-info/RECORD +56 -0
- bigdl/cpp/libs/llama_cpp/llava_shared.dll +0 -0
- bigdl_core_cpp-2.7.0b20250629.dist-info/RECORD +0 -56
- {bigdl_core_cpp-2.7.0b20250629.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.7.0b20250629.data → bigdl_core_cpp-2.7.0b20250701.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.7.0b20250629.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.7.0b20250629.dist-info → bigdl_core_cpp-2.7.0b20250701.dist-info}/top_level.txt +0 -0
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
import logging
|
8
8
|
import os
|
9
|
+
import sys
|
9
10
|
from collections import OrderedDict
|
10
11
|
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
11
12
|
|
@@ -15,7 +16,6 @@ import numpy.typing as npt
|
|
15
16
|
from .quants import quant_shape_to_byte_shape
|
16
17
|
|
17
18
|
if __name__ == "__main__":
|
18
|
-
import sys
|
19
19
|
from pathlib import Path
|
20
20
|
|
21
21
|
# Allow running file in package as a script.
|
@@ -28,6 +28,7 @@ from gguf.constants import (
|
|
28
28
|
GGUF_VERSION,
|
29
29
|
GGMLQuantizationType,
|
30
30
|
GGUFValueType,
|
31
|
+
GGUFEndian,
|
31
32
|
)
|
32
33
|
|
33
34
|
logger = logging.getLogger(__name__)
|
@@ -53,6 +54,48 @@ class ReaderField(NamedTuple):
|
|
53
54
|
|
54
55
|
types: list[GGUFValueType] = []
|
55
56
|
|
57
|
+
def contents(self, index_or_slice: int | slice = slice(None)) -> Any:
|
58
|
+
if self.types:
|
59
|
+
to_string = lambda x: str(x.tobytes(), encoding='utf-8') # noqa: E731
|
60
|
+
main_type = self.types[0]
|
61
|
+
|
62
|
+
if main_type == GGUFValueType.ARRAY:
|
63
|
+
sub_type = self.types[-1]
|
64
|
+
|
65
|
+
if sub_type == GGUFValueType.STRING:
|
66
|
+
indices = self.data[index_or_slice]
|
67
|
+
|
68
|
+
if isinstance(index_or_slice, int):
|
69
|
+
return to_string(self.parts[indices]) # type: ignore
|
70
|
+
else:
|
71
|
+
return [to_string(self.parts[idx]) for idx in indices] # type: ignore
|
72
|
+
else:
|
73
|
+
# FIXME: When/if _get_field_parts() support multi-dimensional arrays, this must do so too
|
74
|
+
|
75
|
+
# Check if it's unsafe to perform slice optimization on data
|
76
|
+
# if any(True for idx in self.data if len(self.parts[idx]) != 1):
|
77
|
+
# optim_slice = slice(None)
|
78
|
+
# else:
|
79
|
+
# optim_slice = index_or_slice
|
80
|
+
# index_or_slice = slice(None)
|
81
|
+
|
82
|
+
# if isinstance(optim_slice, int):
|
83
|
+
# return self.parts[self.data[optim_slice]].tolist()[0]
|
84
|
+
# else:
|
85
|
+
# return [pv for idx in self.data[optim_slice] for pv in self.parts[idx].tolist()][index_or_slice]
|
86
|
+
|
87
|
+
if isinstance(index_or_slice, int):
|
88
|
+
return self.parts[self.data[index_or_slice]].tolist()[0]
|
89
|
+
else:
|
90
|
+
return [pv for idx in self.data[index_or_slice] for pv in self.parts[idx].tolist()]
|
91
|
+
|
92
|
+
if main_type == GGUFValueType.STRING:
|
93
|
+
return to_string(self.parts[-1])
|
94
|
+
else:
|
95
|
+
return self.parts[-1].tolist()[0]
|
96
|
+
|
97
|
+
return None
|
98
|
+
|
56
99
|
|
57
100
|
class ReaderTensor(NamedTuple):
|
58
101
|
name: str
|
@@ -101,10 +144,19 @@ class GGUFReader:
|
|
101
144
|
# If we get 0 here that means it's (probably) a GGUF file created for
|
102
145
|
# the opposite byte order of the machine this script is running on.
|
103
146
|
self.byte_order = 'S'
|
104
|
-
temp_version = temp_version.newbyteorder(self.byte_order)
|
147
|
+
temp_version = temp_version.view(temp_version.dtype.newbyteorder(self.byte_order))
|
105
148
|
version = temp_version[0]
|
106
149
|
if version not in READER_SUPPORTED_VERSIONS:
|
107
150
|
raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
|
151
|
+
if sys.byteorder == "little":
|
152
|
+
# Host is little endian
|
153
|
+
host_endian = GGUFEndian.LITTLE
|
154
|
+
swapped_endian = GGUFEndian.BIG
|
155
|
+
else:
|
156
|
+
# Sorry PDP or other weird systems that don't use BE or LE.
|
157
|
+
host_endian = GGUFEndian.BIG
|
158
|
+
swapped_endian = GGUFEndian.LITTLE
|
159
|
+
self.endianess = swapped_endian if self.byte_order == "S" else host_endian
|
108
160
|
self.fields: OrderedDict[str, ReaderField] = OrderedDict()
|
109
161
|
self.tensors: list[ReaderTensor] = []
|
110
162
|
offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
|
@@ -146,9 +198,7 @@ class GGUFReader:
|
|
146
198
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
147
199
|
end_offs = offset + itemsize * count
|
148
200
|
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
149
|
-
if override_order is None
|
150
|
-
return arr
|
151
|
-
return arr.view(arr.dtype.newbyteorder(override_order))
|
201
|
+
return arr.view(arr.dtype.newbyteorder(self.byte_order if override_order is None else override_order))
|
152
202
|
|
153
203
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
154
204
|
if field.name in self.fields:
|
@@ -190,6 +240,7 @@ class GGUFReader:
|
|
190
240
|
offs += int(alen.nbytes)
|
191
241
|
aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
|
192
242
|
data_idxs: list[int] = []
|
243
|
+
# FIXME: Handle multi-dimensional arrays properly instead of flattening
|
193
244
|
for idx in range(alen[0]):
|
194
245
|
curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
|
195
246
|
if idx == 0:
|
@@ -200,7 +251,7 @@ class GGUFReader:
|
|
200
251
|
offs += curr_size
|
201
252
|
return offs - orig_offs, aparts, data_idxs, types
|
202
253
|
# We can't deal with this one.
|
203
|
-
raise ValueError('Unknown/unhandled field type {gtype}')
|
254
|
+
raise ValueError(f'Unknown/unhandled field type {gtype}')
|
204
255
|
|
205
256
|
def _get_tensor_info_field(self, orig_offs: int) -> ReaderField:
|
206
257
|
offs = orig_offs
|
@@ -49,6 +49,7 @@ class TensorInfo:
|
|
49
49
|
class GGUFValue:
|
50
50
|
value: Any
|
51
51
|
type: GGUFValueType
|
52
|
+
sub_type: GGUFValueType | None = None
|
52
53
|
|
53
54
|
|
54
55
|
class WriterState(Enum):
|
@@ -238,7 +239,7 @@ class GGUFWriter:
|
|
238
239
|
|
239
240
|
for key, val in kv_data.items():
|
240
241
|
kv_bytes += self._pack_val(key, GGUFValueType.STRING, add_vtype=False)
|
241
|
-
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True)
|
242
|
+
kv_bytes += self._pack_val(val.value, val.type, add_vtype=True, sub_type=val.sub_type)
|
242
243
|
|
243
244
|
fout.write(kv_bytes)
|
244
245
|
|
@@ -268,11 +269,11 @@ class GGUFWriter:
|
|
268
269
|
fout.flush()
|
269
270
|
self.state = WriterState.TI_DATA
|
270
271
|
|
271
|
-
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType) -> None:
|
272
|
+
def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None:
|
272
273
|
if any(key in kv_data for kv_data in self.kv_data):
|
273
|
-
|
274
|
+
logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}')
|
274
275
|
|
275
|
-
self.kv_data[0][key] = GGUFValue(value=val, type=vtype)
|
276
|
+
self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type)
|
276
277
|
|
277
278
|
def add_uint8(self, key: str, val: int) -> None:
|
278
279
|
self.add_key_value(key,val, GGUFValueType.UINT8)
|
@@ -689,6 +690,12 @@ class GGUFWriter:
|
|
689
690
|
def add_value_length(self, length: int) -> None:
|
690
691
|
self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
|
691
692
|
|
693
|
+
def add_key_length_mla(self, length: int) -> None:
|
694
|
+
self.add_uint32(Keys.Attention.KEY_LENGTH_MLA.format(arch=self.arch), length)
|
695
|
+
|
696
|
+
def add_value_length_mla(self, length: int) -> None:
|
697
|
+
self.add_uint32(Keys.Attention.VALUE_LENGTH_MLA.format(arch=self.arch), length)
|
698
|
+
|
692
699
|
def add_max_alibi_bias(self, bias: float) -> None:
|
693
700
|
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)
|
694
701
|
|
@@ -722,6 +729,9 @@ class GGUFWriter:
|
|
722
729
|
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
723
730
|
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
724
731
|
|
732
|
+
def add_moe_every_n_layers(self, value: int) -> None:
|
733
|
+
self.add_uint32(Keys.LLM.MOE_EVERY_N_LAYERS.format(arch=self.arch), value)
|
734
|
+
|
725
735
|
def add_swin_norm(self, value: bool) -> None:
|
726
736
|
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
727
737
|
|
@@ -746,6 +756,9 @@ class GGUFWriter:
|
|
746
756
|
def add_token_shift_count(self, count: int) -> None:
|
747
757
|
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
|
748
758
|
|
759
|
+
def add_interleave_moe_layer_step(self, value: int) -> None:
|
760
|
+
self.add_uint32(Keys.LLM.INTERLEAVE_MOE_LAYER_STEP.format(arch=self.arch), value)
|
761
|
+
|
749
762
|
def add_layer_norm_eps(self, value: float) -> None:
|
750
763
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
751
764
|
|
@@ -767,6 +780,18 @@ class GGUFWriter:
|
|
767
780
|
def add_kv_lora_rank(self, length: int) -> None:
|
768
781
|
self.add_uint32(Keys.Attention.KV_LORA_RANK.format(arch=self.arch), length)
|
769
782
|
|
783
|
+
def add_decay_lora_rank(self, length: int) -> None:
|
784
|
+
self.add_uint32(Keys.Attention.DECAY_LORA_RANK.format(arch=self.arch), length)
|
785
|
+
|
786
|
+
def add_iclr_lora_rank(self, length: int) -> None:
|
787
|
+
self.add_uint32(Keys.Attention.ICLR_LORA_RANK.format(arch=self.arch), length)
|
788
|
+
|
789
|
+
def add_value_residual_mix_lora_rank(self, length: int) -> None:
|
790
|
+
self.add_uint32(Keys.Attention.VALUE_RESIDUAL_MIX_LORA_RANK.format(arch=self.arch), length)
|
791
|
+
|
792
|
+
def add_gate_lora_rank(self, length: int) -> None:
|
793
|
+
self.add_uint32(Keys.Attention.GATE_LORA_RANK.format(arch=self.arch), length)
|
794
|
+
|
770
795
|
def add_relative_attn_buckets_count(self, value: int) -> None:
|
771
796
|
self.add_uint32(Keys.Attention.REL_BUCKETS_COUNT.format(arch=self.arch), value)
|
772
797
|
|
@@ -872,7 +897,7 @@ class GGUFWriter:
|
|
872
897
|
def add_remove_extra_whitespaces(self, value: bool) -> None:
|
873
898
|
self.add_bool(Keys.Tokenizer.REMOVE_EXTRA_WS, value)
|
874
899
|
|
875
|
-
def add_precompiled_charsmap(self, charsmap:
|
900
|
+
def add_precompiled_charsmap(self, charsmap: bytes) -> None:
|
876
901
|
self.add_array(Keys.Tokenizer.PRECOMPILED_CHARSMAP, charsmap)
|
877
902
|
|
878
903
|
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
|
@@ -910,13 +935,98 @@ class GGUFWriter:
|
|
910
935
|
def add_eom_token_id(self, id: int) -> None:
|
911
936
|
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
912
937
|
|
938
|
+
def add_classifier_output_labels(self, labels: Sequence[str]) -> None:
|
939
|
+
self.add_array(Keys.Classifier.OUTPUT_LABELS.format(arch=self.arch), labels)
|
940
|
+
|
941
|
+
# for vision models
|
942
|
+
|
943
|
+
def add_clip_has_vision_encoder(self, value: bool) -> None:
|
944
|
+
self.add_bool(Keys.Clip.HAS_VISION_ENCODER, value)
|
945
|
+
|
946
|
+
def add_clip_has_audio_encoder(self, value: bool) -> None:
|
947
|
+
self.add_bool(Keys.Clip.HAS_AUDIO_ENCODER, value)
|
948
|
+
|
949
|
+
def add_clip_projector_type(self, value: str) -> None:
|
950
|
+
self.add_string(Keys.Clip.PROJECTOR_TYPE, value)
|
951
|
+
|
952
|
+
def add_vision_projection_dim(self, value: int) -> None:
|
953
|
+
self.add_uint32(Keys.ClipVision.PROJECTION_DIM, value)
|
954
|
+
|
955
|
+
def add_vision_patch_size(self, value: int) -> None:
|
956
|
+
self.add_uint32(Keys.ClipVision.PATCH_SIZE, value)
|
957
|
+
|
958
|
+
def add_vision_embedding_length(self, value: int) -> None:
|
959
|
+
self.add_uint32(Keys.ClipVision.EMBEDDING_LENGTH, value)
|
960
|
+
|
961
|
+
def add_vision_feed_forward_length(self, value: int) -> None:
|
962
|
+
self.add_uint32(Keys.ClipVision.FEED_FORWARD_LENGTH, value)
|
963
|
+
|
964
|
+
def add_vision_block_count(self, value: int) -> None:
|
965
|
+
self.add_uint32(Keys.ClipVision.BLOCK_COUNT, value)
|
966
|
+
|
967
|
+
def add_vision_head_count(self, value: int) -> None:
|
968
|
+
self.add_uint32(Keys.ClipVision.Attention.HEAD_COUNT, value)
|
969
|
+
|
970
|
+
def add_vision_attention_layernorm_eps(self, value: float) -> None:
|
971
|
+
self.add_float32(Keys.ClipVision.Attention.LAYERNORM_EPS, value)
|
972
|
+
|
973
|
+
def add_vision_image_size(self, value: int) -> None:
|
974
|
+
self.add_uint32(Keys.ClipVision.IMAGE_SIZE, value)
|
975
|
+
|
976
|
+
def add_vision_image_mean(self, values: Sequence[float]) -> None:
|
977
|
+
self.add_array(Keys.ClipVision.IMAGE_MEAN, values)
|
978
|
+
|
979
|
+
def add_vision_image_std(self, values: Sequence[float]) -> None:
|
980
|
+
self.add_array(Keys.ClipVision.IMAGE_STD, values)
|
981
|
+
|
982
|
+
def add_vision_spatial_merge_size(self, value: int) -> None:
|
983
|
+
self.add_uint32(Keys.ClipVision.SPATIAL_MERGE_SIZE, value)
|
984
|
+
|
985
|
+
def add_vision_use_gelu(self, value: bool) -> None:
|
986
|
+
self.add_bool(Keys.ClipVision.USE_GELU, value)
|
987
|
+
|
988
|
+
def add_vision_use_silu(self, value: bool) -> None:
|
989
|
+
self.add_bool(Keys.ClipVision.USE_SILU, value)
|
990
|
+
|
991
|
+
def add_vision_projector_scale_factor(self, value: int) -> None:
|
992
|
+
self.add_uint32(Keys.ClipVision.Projector.SCALE_FACTOR, value)
|
993
|
+
|
994
|
+
def add_vision_n_wa_pattern(self, value: int) -> None:
|
995
|
+
self.add_uint32(Keys.ClipVision.N_WA_PATTERN, value)
|
996
|
+
|
997
|
+
# audio models
|
998
|
+
|
999
|
+
def add_audio_projection_dim(self, value: int) -> None:
|
1000
|
+
self.add_uint32(Keys.ClipAudio.PROJECTION_DIM, value)
|
1001
|
+
|
1002
|
+
def add_audio_embedding_length(self, value: int) -> None:
|
1003
|
+
self.add_uint32(Keys.ClipAudio.EMBEDDING_LENGTH, value)
|
1004
|
+
|
1005
|
+
def add_audio_feed_forward_length(self, value: int) -> None:
|
1006
|
+
self.add_uint32(Keys.ClipAudio.FEED_FORWARD_LENGTH, value)
|
1007
|
+
|
1008
|
+
def add_audio_block_count(self, value: int) -> None:
|
1009
|
+
self.add_uint32(Keys.ClipAudio.BLOCK_COUNT, value)
|
1010
|
+
|
1011
|
+
def add_audio_head_count(self, value: int) -> None:
|
1012
|
+
self.add_uint32(Keys.ClipAudio.Attention.HEAD_COUNT, value)
|
1013
|
+
|
1014
|
+
def add_audio_attention_layernorm_eps(self, value: float) -> None:
|
1015
|
+
self.add_float32(Keys.ClipAudio.Attention.LAYERNORM_EPS, value)
|
1016
|
+
|
1017
|
+
def add_audio_num_mel_bins(self, value: int) -> None:
|
1018
|
+
self.add_uint32(Keys.ClipAudio.NUM_MEL_BINS, value)
|
1019
|
+
|
1020
|
+
def add_audio_stack_factor(self, value: int) -> None:
|
1021
|
+
self.add_uint32(Keys.ClipAudio.Projector.STACK_FACTOR, value)
|
1022
|
+
|
913
1023
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
914
1024
|
pack_prefix = ''
|
915
1025
|
if not skip_pack_prefix:
|
916
1026
|
pack_prefix = '<' if self.endianess == GGUFEndian.LITTLE else '>'
|
917
1027
|
return struct.pack(f'{pack_prefix}{fmt}', value)
|
918
1028
|
|
919
|
-
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool) -> bytes:
|
1029
|
+
def _pack_val(self, val: Any, vtype: GGUFValueType, add_vtype: bool, sub_type: GGUFValueType | None = None) -> bytes:
|
920
1030
|
kv_data = bytearray()
|
921
1031
|
|
922
1032
|
if add_vtype:
|
@@ -937,7 +1047,9 @@ class GGUFWriter:
|
|
937
1047
|
if len(val) == 0:
|
938
1048
|
raise ValueError("Invalid GGUF metadata array. Empty array")
|
939
1049
|
|
940
|
-
if
|
1050
|
+
if sub_type is not None:
|
1051
|
+
ltype = sub_type
|
1052
|
+
elif isinstance(val, bytes):
|
941
1053
|
ltype = GGUFValueType.UINT8
|
942
1054
|
else:
|
943
1055
|
ltype = GGUFValueType.get_type(val[0])
|
bigdl/cpp/gguf-py/gguf/lazy.py
CHANGED
@@ -139,6 +139,16 @@ class LazyBase(ABC, metaclass=LazyMeta):
|
|
139
139
|
|
140
140
|
if isinstance(res, cls._tensor_type):
|
141
141
|
return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn)
|
142
|
+
elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res):
|
143
|
+
# share the evaluation between lazy tuple elements
|
144
|
+
shared_args: list = [args, None]
|
145
|
+
|
146
|
+
def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase:
|
147
|
+
assert len(a) == 2
|
148
|
+
if a[1] is None:
|
149
|
+
a[1] = fn(*a[0], **kw)
|
150
|
+
return a[1][i]
|
151
|
+
return tuple(cls(meta=cls.eager_to_meta(res[i]), args=(shared_args, i), kwargs=kwargs, func=eager_tuple_element) for i in range(len(res)))
|
142
152
|
else:
|
143
153
|
del res # not needed
|
144
154
|
# non-tensor return likely relies on the contents of the args
|
@@ -121,19 +121,39 @@ class Metadata:
|
|
121
121
|
if not model_card_path.is_file():
|
122
122
|
return {}
|
123
123
|
|
124
|
-
# The model card metadata is assumed to always be in YAML
|
124
|
+
# The model card metadata is assumed to always be in YAML (frontmatter)
|
125
125
|
# ref: https://github.com/huggingface/transformers/blob/a5c642fe7a1f25d3bdcd76991443ba6ff7ee34b2/src/transformers/modelcard.py#L468-L473
|
126
|
+
yaml_content: str = ""
|
126
127
|
with open(model_card_path, "r", encoding="utf-8") as f:
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
128
|
+
content = f.read()
|
129
|
+
lines = content.splitlines()
|
130
|
+
lines_yaml = []
|
131
|
+
if len(lines) == 0:
|
132
|
+
# Empty file
|
133
|
+
return {}
|
134
|
+
if len(lines) > 0 and lines[0] != "---":
|
135
|
+
# No frontmatter
|
136
|
+
return {}
|
137
|
+
for line in lines[1:]:
|
138
|
+
if line == "---":
|
139
|
+
break # End of frontmatter
|
132
140
|
else:
|
133
|
-
|
134
|
-
|
141
|
+
lines_yaml.append(line)
|
142
|
+
yaml_content = "\n".join(lines_yaml) + "\n"
|
143
|
+
|
144
|
+
# Quick hack to fix the Norway problem
|
145
|
+
# https://hitchdev.com/strictyaml/why/implicit-typing-removed/
|
146
|
+
yaml_content = yaml_content.replace("- no\n", "- \"no\"\n")
|
147
|
+
|
148
|
+
if yaml_content:
|
149
|
+
data = yaml.safe_load(yaml_content)
|
150
|
+
if isinstance(data, dict):
|
151
|
+
return data
|
135
152
|
else:
|
153
|
+
logger.error(f"while reading YAML model card frontmatter, data is {type(data)} instead of dict")
|
136
154
|
return {}
|
155
|
+
else:
|
156
|
+
return {}
|
137
157
|
|
138
158
|
@staticmethod
|
139
159
|
def load_hf_parameters(model_path: Optional[Path] = None) -> dict[str, Any]:
|