bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +1196 -147
- bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
- bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
- bigdl/cpp/convert_lora_to_gguf.py +82 -14
- bigdl/cpp/gguf-py/gguf/constants.py +645 -187
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
- bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/quants.py +81 -0
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/libc++.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
- bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
- bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
- {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
bigdl/cpp/gguf-py/gguf/gguf.py
CHANGED
@@ -145,11 +145,10 @@ class GGUFReader:
|
|
145
145
|
count = int(count)
|
146
146
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
147
147
|
end_offs = offset + itemsize * count
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
)
|
148
|
+
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
149
|
+
if override_order is None:
|
150
|
+
return arr
|
151
|
+
return arr.view(arr.dtype.newbyteorder(override_order))
|
153
152
|
|
154
153
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
155
154
|
if field.name in self.fields:
|
@@ -314,4 +313,4 @@ class GGUFReader:
|
|
314
313
|
data = self._get(data_offs, item_type, item_count).reshape(np_dims),
|
315
314
|
field = field,
|
316
315
|
))
|
317
|
-
self.tensors = tensors
|
316
|
+
self.tensors = tensors
|
@@ -26,6 +26,7 @@ from .constants import (
|
|
26
26
|
RopeScalingType,
|
27
27
|
PoolingType,
|
28
28
|
TokenType,
|
29
|
+
ExpertGatingFuncType,
|
29
30
|
)
|
30
31
|
|
31
32
|
from .quants import quant_shape_from_byte_shape
|
@@ -568,6 +569,9 @@ class GGUFWriter:
|
|
568
569
|
def add_base_model_organization(self, source_id: int, organization: str) -> None:
|
569
570
|
self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
|
570
571
|
|
572
|
+
def add_base_model_description(self, source_id: int, description: str) -> None:
|
573
|
+
self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
|
574
|
+
|
571
575
|
def add_base_model_url(self, source_id: int, url: str) -> None:
|
572
576
|
self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
|
573
577
|
|
@@ -580,15 +584,42 @@ class GGUFWriter:
|
|
580
584
|
def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
|
581
585
|
self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
|
582
586
|
|
587
|
+
def add_dataset_count(self, source_count: int) -> None:
|
588
|
+
self.add_uint32(Keys.General.DATASET_COUNT, source_count)
|
589
|
+
|
590
|
+
def add_dataset_name(self, source_id: int, name: str) -> None:
|
591
|
+
self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
|
592
|
+
|
593
|
+
def add_dataset_author(self, source_id: int, author: str) -> None:
|
594
|
+
self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
|
595
|
+
|
596
|
+
def add_dataset_version(self, source_id: int, version: str) -> None:
|
597
|
+
self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
|
598
|
+
|
599
|
+
def add_dataset_organization(self, source_id: int, organization: str) -> None:
|
600
|
+
self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
|
601
|
+
|
602
|
+
def add_dataset_description(self, source_id: int, description: str) -> None:
|
603
|
+
self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
|
604
|
+
|
605
|
+
def add_dataset_url(self, source_id: int, url: str) -> None:
|
606
|
+
self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
|
607
|
+
|
608
|
+
def add_dataset_doi(self, source_id: int, doi: str) -> None:
|
609
|
+
self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
|
610
|
+
|
611
|
+
def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
|
612
|
+
self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
|
613
|
+
|
614
|
+
def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
|
615
|
+
self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
|
616
|
+
|
583
617
|
def add_tags(self, tags: Sequence[str]) -> None:
|
584
618
|
self.add_array(Keys.General.TAGS, tags)
|
585
619
|
|
586
620
|
def add_languages(self, languages: Sequence[str]) -> None:
|
587
621
|
self.add_array(Keys.General.LANGUAGES, languages)
|
588
622
|
|
589
|
-
def add_datasets(self, datasets: Sequence[str]) -> None:
|
590
|
-
self.add_array(Keys.General.DATASETS, datasets)
|
591
|
-
|
592
623
|
def add_tensor_data_layout(self, layout: str) -> None:
|
593
624
|
self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
|
594
625
|
|
@@ -601,6 +632,21 @@ class GGUFWriter:
|
|
601
632
|
def add_embedding_length(self, length: int) -> None:
|
602
633
|
self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
603
634
|
|
635
|
+
def add_features_length(self, length: int) -> None:
|
636
|
+
self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
|
637
|
+
|
638
|
+
def add_posnet_embedding_length(self, length: int) -> None:
|
639
|
+
self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
640
|
+
|
641
|
+
def add_posnet_block_count(self, length: int) -> None:
|
642
|
+
self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
|
643
|
+
|
644
|
+
def add_convnext_embedding_length(self, length: int) -> None:
|
645
|
+
self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
|
646
|
+
|
647
|
+
def add_convnext_block_count(self, length: int) -> None:
|
648
|
+
self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
|
649
|
+
|
604
650
|
def add_block_count(self, length: int) -> None:
|
605
651
|
self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
|
606
652
|
|
@@ -670,12 +716,48 @@ class GGUFWriter:
|
|
670
716
|
def add_expert_weights_scale(self, value: float) -> None:
|
671
717
|
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
|
672
718
|
|
719
|
+
def add_expert_weights_norm(self, value: bool) -> None:
|
720
|
+
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
|
721
|
+
|
722
|
+
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
|
723
|
+
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
|
724
|
+
|
725
|
+
def add_swin_norm(self, value: bool) -> None:
|
726
|
+
self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
|
727
|
+
|
728
|
+
def add_rescale_every_n_layers(self, count: int) -> None:
|
729
|
+
self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
|
730
|
+
|
731
|
+
def add_time_mix_extra_dim(self, dim: int) -> None:
|
732
|
+
self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim)
|
733
|
+
|
734
|
+
def add_time_decay_extra_dim(self, dim: int) -> None:
|
735
|
+
self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
|
736
|
+
|
737
|
+
def add_residual_scale(self, value: float) -> None:
|
738
|
+
self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
|
739
|
+
|
740
|
+
def add_embedding_scale(self, value: float) -> None:
|
741
|
+
self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
|
742
|
+
|
743
|
+
def add_wkv_head_size(self, size: int) -> None:
|
744
|
+
self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
|
745
|
+
|
746
|
+
def add_token_shift_count(self, count: int) -> None:
|
747
|
+
self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
|
748
|
+
|
673
749
|
def add_layer_norm_eps(self, value: float) -> None:
|
674
750
|
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
|
675
751
|
|
676
752
|
def add_layer_norm_rms_eps(self, value: float) -> None:
|
677
753
|
self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
|
678
754
|
|
755
|
+
def add_group_norm_eps(self, value: float) -> None:
|
756
|
+
self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
|
757
|
+
|
758
|
+
def add_group_norm_groups(self, value: int) -> None:
|
759
|
+
self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
|
760
|
+
|
679
761
|
def add_causal_attention(self, value: bool) -> None:
|
680
762
|
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
681
763
|
|
@@ -691,12 +773,18 @@ class GGUFWriter:
|
|
691
773
|
def add_sliding_window(self, value: int) -> None:
|
692
774
|
self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
|
693
775
|
|
776
|
+
def add_attention_scale(self, value: float) -> None:
|
777
|
+
self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
|
778
|
+
|
694
779
|
def add_pooling_type(self, value: PoolingType) -> None:
|
695
780
|
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
|
696
781
|
|
697
782
|
def add_rope_dimension_count(self, count: int) -> None:
|
698
783
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
699
784
|
|
785
|
+
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
|
786
|
+
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
|
787
|
+
|
700
788
|
def add_rope_freq_base(self, value: float) -> None:
|
701
789
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
702
790
|
|
@@ -769,9 +857,6 @@ class GGUFWriter:
|
|
769
857
|
def add_pad_token_id(self, id: int) -> None:
|
770
858
|
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
|
771
859
|
|
772
|
-
def add_cls_token_id(self, id: int) -> None:
|
773
|
-
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
|
774
|
-
|
775
860
|
def add_mask_token_id(self, id: int) -> None:
|
776
861
|
self.add_uint32(Keys.Tokenizer.MASK_ID, id)
|
777
862
|
|
@@ -819,15 +904,6 @@ class GGUFWriter:
|
|
819
904
|
|
820
905
|
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
|
821
906
|
|
822
|
-
def add_prefix_token_id(self, id: int) -> None:
|
823
|
-
self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
|
824
|
-
|
825
|
-
def add_suffix_token_id(self, id: int) -> None:
|
826
|
-
self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
|
827
|
-
|
828
|
-
def add_middle_token_id(self, id: int) -> None:
|
829
|
-
self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
|
830
|
-
|
831
907
|
def add_eot_token_id(self, id: int) -> None:
|
832
908
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
833
909
|
|
@@ -885,4 +961,4 @@ class GGUFWriter:
|
|
885
961
|
if abs(fnum) < 1000.0:
|
886
962
|
return f"{fnum:3.1f}{unit}"
|
887
963
|
fnum /= 1000.0
|
888
|
-
return f"{fnum:.1f}T - over 1TB, split recommended"
|
964
|
+
return f"{fnum:.1f}T - over 1TB, split recommended"
|
bigdl/cpp/gguf-py/gguf/lazy.py
CHANGED
@@ -41,7 +41,7 @@ class Metadata:
|
|
41
41
|
base_models: Optional[list[dict]] = None
|
42
42
|
tags: Optional[list[str]] = None
|
43
43
|
languages: Optional[list[str]] = None
|
44
|
-
datasets: Optional[list[
|
44
|
+
datasets: Optional[list[dict]] = None
|
45
45
|
|
46
46
|
@staticmethod
|
47
47
|
def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
|
@@ -91,9 +91,11 @@ class Metadata:
|
|
91
91
|
# Base Models is received here as an array of models
|
92
92
|
metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
|
93
93
|
|
94
|
+
# Datasets is received here as an array of datasets
|
95
|
+
metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
|
96
|
+
|
94
97
|
metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
|
95
98
|
metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
|
96
|
-
metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
|
97
99
|
|
98
100
|
# Direct Metadata Override (via direct cli argument)
|
99
101
|
if model_name is not None:
|
@@ -346,12 +348,12 @@ class Metadata:
|
|
346
348
|
use_model_card_metadata("author", "model_creator")
|
347
349
|
use_model_card_metadata("basename", "model_type")
|
348
350
|
|
349
|
-
if "base_model" in model_card:
|
351
|
+
if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
|
350
352
|
# This represents the parent models that this is based on
|
351
353
|
# Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
|
352
354
|
# Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
|
353
355
|
metadata_base_models = []
|
354
|
-
base_model_value = model_card.get("base_model", None)
|
356
|
+
base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
|
355
357
|
|
356
358
|
if base_model_value is not None:
|
357
359
|
if isinstance(base_model_value, str):
|
@@ -364,18 +366,106 @@ class Metadata:
|
|
364
366
|
|
365
367
|
for model_id in metadata_base_models:
|
366
368
|
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
367
|
-
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
368
369
|
base_model = {}
|
369
|
-
if
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
370
|
+
if isinstance(model_id, str):
|
371
|
+
if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
|
372
|
+
base_model["repo_url"] = model_id
|
373
|
+
|
374
|
+
# Check if Hugging Face ID is present in URL
|
375
|
+
if "huggingface.co" in model_id:
|
376
|
+
match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
|
377
|
+
if match:
|
378
|
+
model_id_component = match.group(1)
|
379
|
+
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
|
380
|
+
|
381
|
+
# Populate model dictionary with extracted components
|
382
|
+
if model_full_name_component is not None:
|
383
|
+
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
384
|
+
if org_component is not None:
|
385
|
+
base_model["organization"] = Metadata.id_to_title(org_component)
|
386
|
+
if version is not None:
|
387
|
+
base_model["version"] = version
|
388
|
+
|
389
|
+
else:
|
390
|
+
# Likely a Hugging Face ID
|
391
|
+
model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
|
392
|
+
|
393
|
+
# Populate model dictionary with extracted components
|
394
|
+
if model_full_name_component is not None:
|
395
|
+
base_model["name"] = Metadata.id_to_title(model_full_name_component)
|
396
|
+
if org_component is not None:
|
397
|
+
base_model["organization"] = Metadata.id_to_title(org_component)
|
398
|
+
if version is not None:
|
399
|
+
base_model["version"] = version
|
400
|
+
if org_component is not None and model_full_name_component is not None:
|
401
|
+
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
402
|
+
|
403
|
+
elif isinstance(model_id, dict):
|
404
|
+
base_model = model_id
|
405
|
+
|
406
|
+
else:
|
407
|
+
logger.error(f"base model entry '{str(model_id)}' not in a known format")
|
408
|
+
|
377
409
|
metadata.base_models.append(base_model)
|
378
410
|
|
411
|
+
if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
|
412
|
+
# This represents the datasets that this was trained from
|
413
|
+
metadata_datasets = []
|
414
|
+
dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
|
415
|
+
|
416
|
+
if dataset_value is not None:
|
417
|
+
if isinstance(dataset_value, str):
|
418
|
+
metadata_datasets.append(dataset_value)
|
419
|
+
elif isinstance(dataset_value, list):
|
420
|
+
metadata_datasets.extend(dataset_value)
|
421
|
+
|
422
|
+
if metadata.datasets is None:
|
423
|
+
metadata.datasets = []
|
424
|
+
|
425
|
+
for dataset_id in metadata_datasets:
|
426
|
+
# NOTE: model size of base model is assumed to be similar to the size of the current model
|
427
|
+
dataset = {}
|
428
|
+
if isinstance(dataset_id, str):
|
429
|
+
if dataset_id.startswith(("http://", "https://", "ssh://")):
|
430
|
+
dataset["repo_url"] = dataset_id
|
431
|
+
|
432
|
+
# Check if Hugging Face ID is present in URL
|
433
|
+
if "huggingface.co" in dataset_id:
|
434
|
+
match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
|
435
|
+
if match:
|
436
|
+
dataset_id_component = match.group(1)
|
437
|
+
dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
|
438
|
+
|
439
|
+
# Populate dataset dictionary with extracted components
|
440
|
+
if dataset_name_component is not None:
|
441
|
+
dataset["name"] = Metadata.id_to_title(dataset_name_component)
|
442
|
+
if org_component is not None:
|
443
|
+
dataset["organization"] = Metadata.id_to_title(org_component)
|
444
|
+
if version is not None:
|
445
|
+
dataset["version"] = version
|
446
|
+
|
447
|
+
else:
|
448
|
+
# Likely a Hugging Face ID
|
449
|
+
dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
|
450
|
+
|
451
|
+
# Populate dataset dictionary with extracted components
|
452
|
+
if dataset_name_component is not None:
|
453
|
+
dataset["name"] = Metadata.id_to_title(dataset_name_component)
|
454
|
+
if org_component is not None:
|
455
|
+
dataset["organization"] = Metadata.id_to_title(org_component)
|
456
|
+
if version is not None:
|
457
|
+
dataset["version"] = version
|
458
|
+
if org_component is not None and dataset_name_component is not None:
|
459
|
+
dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
|
460
|
+
|
461
|
+
elif isinstance(dataset_id, dict):
|
462
|
+
dataset = dataset_id
|
463
|
+
|
464
|
+
else:
|
465
|
+
logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
|
466
|
+
|
467
|
+
metadata.datasets.append(dataset)
|
468
|
+
|
379
469
|
use_model_card_metadata("license", "license")
|
380
470
|
use_model_card_metadata("license_name", "license_name")
|
381
471
|
use_model_card_metadata("license_link", "license_link")
|
@@ -386,9 +476,6 @@ class Metadata:
|
|
386
476
|
use_array_model_card_metadata("languages", "languages")
|
387
477
|
use_array_model_card_metadata("languages", "language")
|
388
478
|
|
389
|
-
use_array_model_card_metadata("datasets", "datasets")
|
390
|
-
use_array_model_card_metadata("datasets", "dataset")
|
391
|
-
|
392
479
|
# Hugging Face Parameter Heuristics
|
393
480
|
####################################
|
394
481
|
|
@@ -458,7 +545,10 @@ class Metadata:
|
|
458
545
|
gguf_writer.add_size_label(self.size_label)
|
459
546
|
|
460
547
|
if self.license is not None:
|
461
|
-
|
548
|
+
if isinstance(self.license, list):
|
549
|
+
gguf_writer.add_license(",".join(self.license))
|
550
|
+
else:
|
551
|
+
gguf_writer.add_license(self.license)
|
462
552
|
if self.license_name is not None:
|
463
553
|
gguf_writer.add_license_name(self.license_name)
|
464
554
|
if self.license_link is not None:
|
@@ -493,6 +583,8 @@ class Metadata:
|
|
493
583
|
gguf_writer.add_base_model_version(key, base_model_entry["version"])
|
494
584
|
if "organization" in base_model_entry:
|
495
585
|
gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
|
586
|
+
if "description" in base_model_entry:
|
587
|
+
gguf_writer.add_base_model_description(key, base_model_entry["description"])
|
496
588
|
if "url" in base_model_entry:
|
497
589
|
gguf_writer.add_base_model_url(key, base_model_entry["url"])
|
498
590
|
if "doi" in base_model_entry:
|
@@ -502,9 +594,29 @@ class Metadata:
|
|
502
594
|
if "repo_url" in base_model_entry:
|
503
595
|
gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
|
504
596
|
|
597
|
+
if self.datasets is not None:
|
598
|
+
gguf_writer.add_dataset_count(len(self.datasets))
|
599
|
+
for key, dataset_entry in enumerate(self.datasets):
|
600
|
+
if "name" in dataset_entry:
|
601
|
+
gguf_writer.add_dataset_name(key, dataset_entry["name"])
|
602
|
+
if "author" in dataset_entry:
|
603
|
+
gguf_writer.add_dataset_author(key, dataset_entry["author"])
|
604
|
+
if "version" in dataset_entry:
|
605
|
+
gguf_writer.add_dataset_version(key, dataset_entry["version"])
|
606
|
+
if "organization" in dataset_entry:
|
607
|
+
gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
|
608
|
+
if "description" in dataset_entry:
|
609
|
+
gguf_writer.add_dataset_description(key, dataset_entry["description"])
|
610
|
+
if "url" in dataset_entry:
|
611
|
+
gguf_writer.add_dataset_url(key, dataset_entry["url"])
|
612
|
+
if "doi" in dataset_entry:
|
613
|
+
gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
|
614
|
+
if "uuid" in dataset_entry:
|
615
|
+
gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
|
616
|
+
if "repo_url" in dataset_entry:
|
617
|
+
gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
|
618
|
+
|
505
619
|
if self.tags is not None:
|
506
620
|
gguf_writer.add_tags(self.tags)
|
507
621
|
if self.languages is not None:
|
508
622
|
gguf_writer.add_languages(self.languages)
|
509
|
-
if self.datasets is not None:
|
510
|
-
gguf_writer.add_datasets(self.datasets)
|
bigdl/cpp/gguf-py/gguf/quants.py
CHANGED
@@ -574,6 +574,87 @@ class Q6_K(__Quant, qtype=GGMLQuantizationType.Q6_K):
|
|
574
574
|
return (d * q).reshape((n_blocks, QK_K))
|
575
575
|
|
576
576
|
|
577
|
+
class TQ1_0(__Quant, qtype=GGMLQuantizationType.TQ1_0):
|
578
|
+
@classmethod
|
579
|
+
def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
580
|
+
n_blocks = blocks.shape[0]
|
581
|
+
|
582
|
+
d = abs(blocks).max(axis=-1, keepdims=True)
|
583
|
+
with np.errstate(divide="ignore"):
|
584
|
+
id = np.where(d == 0, 0, 1 / d)
|
585
|
+
qs = np_roundf(blocks * id)
|
586
|
+
qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8)
|
587
|
+
|
588
|
+
qs0, qs1, qh = qs[..., :(32 * 5)], qs[..., (32 * 5):(48 * 5)], qs[..., (48 * 5):]
|
589
|
+
qs0 = qs0.reshape((n_blocks, -1, 5, 32)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1))
|
590
|
+
qs0 = np.sum(qs0, axis=-2).reshape((n_blocks, -1))
|
591
|
+
qs1 = qs1.reshape((n_blocks, -1, 5, 16)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1))
|
592
|
+
qs1 = np.sum(qs1, axis=-2).reshape((n_blocks, -1))
|
593
|
+
qh = qh.reshape((n_blocks, -1, 4, 4)) * np.array([81, 27, 9, 3], dtype=np.uint8).reshape((1, 1, 4, 1))
|
594
|
+
qh = np.sum(qh, axis=-2).reshape((n_blocks, -1))
|
595
|
+
qs = np.concatenate([qs0, qs1, qh], axis=-1)
|
596
|
+
qs = (qs.astype(np.uint16) * 256 + (243 - 1)) // 243
|
597
|
+
|
598
|
+
qs = qs.astype(np.uint8)
|
599
|
+
d = d.astype(np.float16).view(np.uint8)
|
600
|
+
|
601
|
+
return np.concatenate([qs, d], axis=-1)
|
602
|
+
|
603
|
+
@classmethod
|
604
|
+
def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
605
|
+
n_blocks = blocks.shape[0]
|
606
|
+
|
607
|
+
qs, rest = np.hsplit(blocks, [(QK_K - 4 * QK_K // 64) // 5])
|
608
|
+
qh, d = np.hsplit(rest, [QK_K // 64])
|
609
|
+
|
610
|
+
d = d.view(np.float16).astype(np.float32)
|
611
|
+
|
612
|
+
qs0, qs1 = qs[..., :32], qs[..., 32:]
|
613
|
+
qs0 = qs0.reshape((n_blocks, -1, 1, 32)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1))
|
614
|
+
qs0 = qs0.reshape((n_blocks, -1))
|
615
|
+
qs1 = qs1.reshape((n_blocks, -1, 1, 16)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1))
|
616
|
+
qs1 = qs1.reshape((n_blocks, -1))
|
617
|
+
qh = qh.reshape((n_blocks, -1, 1, 4)) * np.array([1, 3, 9, 27], dtype=np.uint8).reshape((1, 1, 4, 1))
|
618
|
+
qh = qh.reshape((n_blocks, -1))
|
619
|
+
qs = np.concatenate([qs0, qs1, qh], axis=-1)
|
620
|
+
qs = ((qs.astype(np.uint16) * 3) >> 8).astype(np.int8) - np.int8(1)
|
621
|
+
|
622
|
+
return (d * qs.astype(np.float32))
|
623
|
+
|
624
|
+
|
625
|
+
class TQ2_0(__Quant, qtype=GGMLQuantizationType.TQ2_0):
|
626
|
+
@classmethod
|
627
|
+
def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
628
|
+
n_blocks = blocks.shape[0]
|
629
|
+
|
630
|
+
d = abs(blocks).max(axis=-1, keepdims=True)
|
631
|
+
with np.errstate(divide="ignore"):
|
632
|
+
id = np.where(d == 0, 0, 1 / d)
|
633
|
+
qs = np_roundf(blocks * id)
|
634
|
+
qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8)
|
635
|
+
|
636
|
+
qs = qs.reshape((n_blocks, -1, 4, 32)) << np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
|
637
|
+
qs = qs[..., 0, :] | qs[..., 1, :] | qs[..., 2, :] | qs[..., 3, :]
|
638
|
+
qs = qs.reshape((n_blocks, -1))
|
639
|
+
|
640
|
+
d = d.astype(np.float16).view(np.uint8)
|
641
|
+
|
642
|
+
return np.concatenate([qs, d], axis=-1)
|
643
|
+
|
644
|
+
@classmethod
|
645
|
+
def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
|
646
|
+
n_blocks = blocks.shape[0]
|
647
|
+
|
648
|
+
qs, d = np.hsplit(blocks, [QK_K // 4])
|
649
|
+
|
650
|
+
d = d.view(np.float16).astype(np.float32)
|
651
|
+
|
652
|
+
qs = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
|
653
|
+
qs = (qs & 0x03).reshape((n_blocks, -1)).astype(np.int8) - np.int8(1)
|
654
|
+
|
655
|
+
return (d * qs.astype(np.float32))
|
656
|
+
|
657
|
+
|
577
658
|
class IQ2_XXS(__Quant, qtype=GGMLQuantizationType.IQ2_XXS):
|
578
659
|
ksigns: bytes = (
|
579
660
|
b"\x00\x81\x82\x03\x84\x05\x06\x87\x88\x09\x0a\x8b\x0c\x8d\x8e\x0f"
|