bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +1196 -147
  2. bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
  4. bigdl/cpp/convert_lora_to_gguf.py +82 -14
  5. bigdl/cpp/gguf-py/gguf/constants.py +645 -187
  6. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  7. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  8. bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
  9. bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
  10. bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
  11. bigdl/cpp/gguf-py/gguf/quants.py +81 -0
  12. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
  13. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  14. bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
  15. bigdl/cpp/libs/common.lib +0 -0
  16. bigdl/cpp/libs/ggml-base.dll +0 -0
  17. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  18. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  19. bigdl/cpp/libs/ggml.dll +0 -0
  20. bigdl/cpp/libs/libc++.dll +0 -0
  21. bigdl/cpp/libs/llama-batched.exe +0 -0
  22. bigdl/cpp/libs/llama-bench.exe +0 -0
  23. bigdl/cpp/libs/llama-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-embedding.exe +0 -0
  25. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  26. bigdl/cpp/libs/llama-gguf.exe +0 -0
  27. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  28. bigdl/cpp/libs/llama-lookup.exe +0 -0
  29. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  30. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  31. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  32. bigdl/cpp/libs/llama-quantize.exe +0 -0
  33. bigdl/cpp/libs/llama-server.exe +0 -0
  34. bigdl/cpp/libs/llama-simple.exe +0 -0
  35. bigdl/cpp/libs/llama-speculative.exe +0 -0
  36. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  37. bigdl/cpp/libs/llama.dll +0 -0
  38. bigdl/cpp/libs/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  42. bigdl/cpp/libs/ollama-lib.exe +0 -0
  43. bigdl/cpp/libs/ollama.exe +0 -0
  44. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  45. bigdl/cpp/libs/ollama_llama.dll +0 -0
  46. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  47. bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
  48. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
  49. bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
  50. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
  51. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
  52. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
  53. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
  54. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
  55. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
  56. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
  57. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
  58. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
  59. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
  60. bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
  61. bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
  62. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
  63. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
  64. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -12,4 +12,4 @@ sys.path.insert(0, str(Path(__file__).parent.parent))
12
12
  importlib.invalidate_caches()
13
13
  import gguf # noqa: E402
14
14
 
15
- importlib.reload(gguf)
15
+ importlib.reload(gguf)
@@ -145,11 +145,10 @@ class GGUFReader:
145
145
  count = int(count)
146
146
  itemsize = int(np.empty([], dtype = dtype).itemsize)
147
147
  end_offs = offset + itemsize * count
148
- return (
149
- self.data[offset:end_offs]
150
- .view(dtype = dtype)[:count]
151
- .newbyteorder(override_order or self.byte_order)
152
- )
148
+ arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
149
+ if override_order is None:
150
+ return arr
151
+ return arr.view(arr.dtype.newbyteorder(override_order))
153
152
 
154
153
  def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
155
154
  if field.name in self.fields:
@@ -314,4 +313,4 @@ class GGUFReader:
314
313
  data = self._get(data_offs, item_type, item_count).reshape(np_dims),
315
314
  field = field,
316
315
  ))
317
- self.tensors = tensors
316
+ self.tensors = tensors
@@ -26,6 +26,7 @@ from .constants import (
26
26
  RopeScalingType,
27
27
  PoolingType,
28
28
  TokenType,
29
+ ExpertGatingFuncType,
29
30
  )
30
31
 
31
32
  from .quants import quant_shape_from_byte_shape
@@ -568,6 +569,9 @@ class GGUFWriter:
568
569
  def add_base_model_organization(self, source_id: int, organization: str) -> None:
569
570
  self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization)
570
571
 
572
+ def add_base_model_description(self, source_id: int, description: str) -> None:
573
+ self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description)
574
+
571
575
  def add_base_model_url(self, source_id: int, url: str) -> None:
572
576
  self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url)
573
577
 
@@ -580,15 +584,42 @@ class GGUFWriter:
580
584
  def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None:
581
585
  self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url)
582
586
 
587
+ def add_dataset_count(self, source_count: int) -> None:
588
+ self.add_uint32(Keys.General.DATASET_COUNT, source_count)
589
+
590
+ def add_dataset_name(self, source_id: int, name: str) -> None:
591
+ self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name)
592
+
593
+ def add_dataset_author(self, source_id: int, author: str) -> None:
594
+ self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author)
595
+
596
+ def add_dataset_version(self, source_id: int, version: str) -> None:
597
+ self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version)
598
+
599
+ def add_dataset_organization(self, source_id: int, organization: str) -> None:
600
+ self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization)
601
+
602
+ def add_dataset_description(self, source_id: int, description: str) -> None:
603
+ self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description)
604
+
605
+ def add_dataset_url(self, source_id: int, url: str) -> None:
606
+ self.add_string(Keys.General.DATASET_URL.format(id=source_id), url)
607
+
608
+ def add_dataset_doi(self, source_id: int, doi: str) -> None:
609
+ self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi)
610
+
611
+ def add_dataset_uuid(self, source_id: int, uuid: str) -> None:
612
+ self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid)
613
+
614
+ def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None:
615
+ self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url)
616
+
583
617
  def add_tags(self, tags: Sequence[str]) -> None:
584
618
  self.add_array(Keys.General.TAGS, tags)
585
619
 
586
620
  def add_languages(self, languages: Sequence[str]) -> None:
587
621
  self.add_array(Keys.General.LANGUAGES, languages)
588
622
 
589
- def add_datasets(self, datasets: Sequence[str]) -> None:
590
- self.add_array(Keys.General.DATASETS, datasets)
591
-
592
623
  def add_tensor_data_layout(self, layout: str) -> None:
593
624
  self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout)
594
625
 
@@ -601,6 +632,21 @@ class GGUFWriter:
601
632
  def add_embedding_length(self, length: int) -> None:
602
633
  self.add_uint32(Keys.LLM.EMBEDDING_LENGTH.format(arch=self.arch), length)
603
634
 
635
+ def add_features_length(self, length: int) -> None:
636
+ self.add_uint32(Keys.LLM.FEATURES_LENGTH.format(arch=self.arch), length)
637
+
638
+ def add_posnet_embedding_length(self, length: int) -> None:
639
+ self.add_uint32(Keys.PosNet.EMBEDDING_LENGTH.format(arch=self.arch), length)
640
+
641
+ def add_posnet_block_count(self, length: int) -> None:
642
+ self.add_uint32(Keys.PosNet.BLOCK_COUNT.format(arch=self.arch), length)
643
+
644
+ def add_convnext_embedding_length(self, length: int) -> None:
645
+ self.add_uint32(Keys.ConvNext.EMBEDDING_LENGTH.format(arch=self.arch), length)
646
+
647
+ def add_convnext_block_count(self, length: int) -> None:
648
+ self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length)
649
+
604
650
  def add_block_count(self, length: int) -> None:
605
651
  self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length)
606
652
 
@@ -670,12 +716,48 @@ class GGUFWriter:
670
716
  def add_expert_weights_scale(self, value: float) -> None:
671
717
  self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
672
718
 
719
+ def add_expert_weights_norm(self, value: bool) -> None:
720
+ self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
721
+
722
+ def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
723
+ self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
724
+
725
+ def add_swin_norm(self, value: bool) -> None:
726
+ self.add_bool(Keys.LLM.SWIN_NORM.format(arch=self.arch), value)
727
+
728
+ def add_rescale_every_n_layers(self, count: int) -> None:
729
+ self.add_uint32(Keys.LLM.RESCALE_EVERY_N_LAYERS.format(arch=self.arch), count)
730
+
731
+ def add_time_mix_extra_dim(self, dim: int) -> None:
732
+ self.add_uint32(Keys.LLM.TIME_MIX_EXTRA_DIM.format(arch=self.arch), dim)
733
+
734
+ def add_time_decay_extra_dim(self, dim: int) -> None:
735
+ self.add_uint32(Keys.LLM.TIME_DECAY_EXTRA_DIM.format(arch=self.arch), dim)
736
+
737
+ def add_residual_scale(self, value: float) -> None:
738
+ self.add_float32(Keys.LLM.RESIDUAL_SCALE.format(arch=self.arch), value)
739
+
740
+ def add_embedding_scale(self, value: float) -> None:
741
+ self.add_float32(Keys.LLM.EMBEDDING_SCALE.format(arch=self.arch), value)
742
+
743
+ def add_wkv_head_size(self, size: int) -> None:
744
+ self.add_uint32(Keys.WKV.HEAD_SIZE.format(arch=self.arch), size)
745
+
746
+ def add_token_shift_count(self, count: int) -> None:
747
+ self.add_uint32(Keys.LLM.TOKEN_SHIFT_COUNT.format(arch=self.arch), count)
748
+
673
749
  def add_layer_norm_eps(self, value: float) -> None:
674
750
  self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
675
751
 
676
752
  def add_layer_norm_rms_eps(self, value: float) -> None:
677
753
  self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
678
754
 
755
+ def add_group_norm_eps(self, value: float) -> None:
756
+ self.add_float32(Keys.Attention.GROUPNORM_EPS.format(arch=self.arch), value)
757
+
758
+ def add_group_norm_groups(self, value: int) -> None:
759
+ self.add_uint32(Keys.Attention.GROUPNORM_GROUPS.format(arch=self.arch), value)
760
+
679
761
  def add_causal_attention(self, value: bool) -> None:
680
762
  self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
681
763
 
@@ -691,12 +773,18 @@ class GGUFWriter:
691
773
  def add_sliding_window(self, value: int) -> None:
692
774
  self.add_uint32(Keys.Attention.SLIDING_WINDOW.format(arch=self.arch), value)
693
775
 
776
+ def add_attention_scale(self, value: float) -> None:
777
+ self.add_float32(Keys.Attention.SCALE.format(arch=self.arch), value)
778
+
694
779
  def add_pooling_type(self, value: PoolingType) -> None:
695
780
  self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value.value)
696
781
 
697
782
  def add_rope_dimension_count(self, count: int) -> None:
698
783
  self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
699
784
 
785
+ def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
786
+ self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
787
+
700
788
  def add_rope_freq_base(self, value: float) -> None:
701
789
  self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
702
790
 
@@ -769,9 +857,6 @@ class GGUFWriter:
769
857
  def add_pad_token_id(self, id: int) -> None:
770
858
  self.add_uint32(Keys.Tokenizer.PAD_ID, id)
771
859
 
772
- def add_cls_token_id(self, id: int) -> None:
773
- self.add_uint32(Keys.Tokenizer.CLS_ID, id)
774
-
775
860
  def add_mask_token_id(self, id: int) -> None:
776
861
  self.add_uint32(Keys.Tokenizer.MASK_ID, id)
777
862
 
@@ -819,15 +904,6 @@ class GGUFWriter:
819
904
 
820
905
  self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
821
906
 
822
- def add_prefix_token_id(self, id: int) -> None:
823
- self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
824
-
825
- def add_suffix_token_id(self, id: int) -> None:
826
- self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
827
-
828
- def add_middle_token_id(self, id: int) -> None:
829
- self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
830
-
831
907
  def add_eot_token_id(self, id: int) -> None:
832
908
  self.add_uint32(Keys.Tokenizer.EOT_ID, id)
833
909
 
@@ -885,4 +961,4 @@ class GGUFWriter:
885
961
  if abs(fnum) < 1000.0:
886
962
  return f"{fnum:3.1f}{unit}"
887
963
  fnum /= 1000.0
888
- return f"{fnum:.1f}T - over 1TB, split recommended"
964
+ return f"{fnum:.1f}T - over 1TB, split recommended"
@@ -210,5 +210,4 @@ class LazyNumpyTensor(LazyBase):
210
210
  eager = LazyNumpyTensor.to_eager(self)
211
211
  return eager.tofile(*args, **kwargs)
212
212
 
213
-
214
213
  # TODO: __array_function__
@@ -41,7 +41,7 @@ class Metadata:
41
41
  base_models: Optional[list[dict]] = None
42
42
  tags: Optional[list[str]] = None
43
43
  languages: Optional[list[str]] = None
44
- datasets: Optional[list[str]] = None
44
+ datasets: Optional[list[dict]] = None
45
45
 
46
46
  @staticmethod
47
47
  def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata:
@@ -91,9 +91,11 @@ class Metadata:
91
91
  # Base Models is received here as an array of models
92
92
  metadata.base_models = metadata_override.get("general.base_models", metadata.base_models)
93
93
 
94
+ # Datasets is received here as an array of datasets
95
+ metadata.datasets = metadata_override.get("general.datasets", metadata.datasets)
96
+
94
97
  metadata.tags = metadata_override.get(Keys.General.TAGS, metadata.tags)
95
98
  metadata.languages = metadata_override.get(Keys.General.LANGUAGES, metadata.languages)
96
- metadata.datasets = metadata_override.get(Keys.General.DATASETS, metadata.datasets)
97
99
 
98
100
  # Direct Metadata Override (via direct cli argument)
99
101
  if model_name is not None:
@@ -346,12 +348,12 @@ class Metadata:
346
348
  use_model_card_metadata("author", "model_creator")
347
349
  use_model_card_metadata("basename", "model_type")
348
350
 
349
- if "base_model" in model_card:
351
+ if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card:
350
352
  # This represents the parent models that this is based on
351
353
  # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges)
352
354
  # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md
353
355
  metadata_base_models = []
354
- base_model_value = model_card.get("base_model", None)
356
+ base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None)))
355
357
 
356
358
  if base_model_value is not None:
357
359
  if isinstance(base_model_value, str):
@@ -364,18 +366,106 @@ class Metadata:
364
366
 
365
367
  for model_id in metadata_base_models:
366
368
  # NOTE: model size of base model is assumed to be similar to the size of the current model
367
- model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
368
369
  base_model = {}
369
- if model_full_name_component is not None:
370
- base_model["name"] = Metadata.id_to_title(model_full_name_component)
371
- if org_component is not None:
372
- base_model["organization"] = Metadata.id_to_title(org_component)
373
- if version is not None:
374
- base_model["version"] = version
375
- if org_component is not None and model_full_name_component is not None:
376
- base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
370
+ if isinstance(model_id, str):
371
+ if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"):
372
+ base_model["repo_url"] = model_id
373
+
374
+ # Check if Hugging Face ID is present in URL
375
+ if "huggingface.co" in model_id:
376
+ match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id)
377
+ if match:
378
+ model_id_component = match.group(1)
379
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params)
380
+
381
+ # Populate model dictionary with extracted components
382
+ if model_full_name_component is not None:
383
+ base_model["name"] = Metadata.id_to_title(model_full_name_component)
384
+ if org_component is not None:
385
+ base_model["organization"] = Metadata.id_to_title(org_component)
386
+ if version is not None:
387
+ base_model["version"] = version
388
+
389
+ else:
390
+ # Likely a Hugging Face ID
391
+ model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params)
392
+
393
+ # Populate model dictionary with extracted components
394
+ if model_full_name_component is not None:
395
+ base_model["name"] = Metadata.id_to_title(model_full_name_component)
396
+ if org_component is not None:
397
+ base_model["organization"] = Metadata.id_to_title(org_component)
398
+ if version is not None:
399
+ base_model["version"] = version
400
+ if org_component is not None and model_full_name_component is not None:
401
+ base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
402
+
403
+ elif isinstance(model_id, dict):
404
+ base_model = model_id
405
+
406
+ else:
407
+ logger.error(f"base model entry '{str(model_id)}' not in a known format")
408
+
377
409
  metadata.base_models.append(base_model)
378
410
 
411
+ if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card:
412
+ # This represents the datasets that this was trained from
413
+ metadata_datasets = []
414
+ dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None)))
415
+
416
+ if dataset_value is not None:
417
+ if isinstance(dataset_value, str):
418
+ metadata_datasets.append(dataset_value)
419
+ elif isinstance(dataset_value, list):
420
+ metadata_datasets.extend(dataset_value)
421
+
422
+ if metadata.datasets is None:
423
+ metadata.datasets = []
424
+
425
+ for dataset_id in metadata_datasets:
426
+ # NOTE: model size of base model is assumed to be similar to the size of the current model
427
+ dataset = {}
428
+ if isinstance(dataset_id, str):
429
+ if dataset_id.startswith(("http://", "https://", "ssh://")):
430
+ dataset["repo_url"] = dataset_id
431
+
432
+ # Check if Hugging Face ID is present in URL
433
+ if "huggingface.co" in dataset_id:
434
+ match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id)
435
+ if match:
436
+ dataset_id_component = match.group(1)
437
+ dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params)
438
+
439
+ # Populate dataset dictionary with extracted components
440
+ if dataset_name_component is not None:
441
+ dataset["name"] = Metadata.id_to_title(dataset_name_component)
442
+ if org_component is not None:
443
+ dataset["organization"] = Metadata.id_to_title(org_component)
444
+ if version is not None:
445
+ dataset["version"] = version
446
+
447
+ else:
448
+ # Likely a Hugging Face ID
449
+ dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params)
450
+
451
+ # Populate dataset dictionary with extracted components
452
+ if dataset_name_component is not None:
453
+ dataset["name"] = Metadata.id_to_title(dataset_name_component)
454
+ if org_component is not None:
455
+ dataset["organization"] = Metadata.id_to_title(org_component)
456
+ if version is not None:
457
+ dataset["version"] = version
458
+ if org_component is not None and dataset_name_component is not None:
459
+ dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}"
460
+
461
+ elif isinstance(dataset_id, dict):
462
+ dataset = dataset_id
463
+
464
+ else:
465
+ logger.error(f"dataset entry '{str(dataset_id)}' not in a known format")
466
+
467
+ metadata.datasets.append(dataset)
468
+
379
469
  use_model_card_metadata("license", "license")
380
470
  use_model_card_metadata("license_name", "license_name")
381
471
  use_model_card_metadata("license_link", "license_link")
@@ -386,9 +476,6 @@ class Metadata:
386
476
  use_array_model_card_metadata("languages", "languages")
387
477
  use_array_model_card_metadata("languages", "language")
388
478
 
389
- use_array_model_card_metadata("datasets", "datasets")
390
- use_array_model_card_metadata("datasets", "dataset")
391
-
392
479
  # Hugging Face Parameter Heuristics
393
480
  ####################################
394
481
 
@@ -458,7 +545,10 @@ class Metadata:
458
545
  gguf_writer.add_size_label(self.size_label)
459
546
 
460
547
  if self.license is not None:
461
- gguf_writer.add_license(self.license)
548
+ if isinstance(self.license, list):
549
+ gguf_writer.add_license(",".join(self.license))
550
+ else:
551
+ gguf_writer.add_license(self.license)
462
552
  if self.license_name is not None:
463
553
  gguf_writer.add_license_name(self.license_name)
464
554
  if self.license_link is not None:
@@ -493,6 +583,8 @@ class Metadata:
493
583
  gguf_writer.add_base_model_version(key, base_model_entry["version"])
494
584
  if "organization" in base_model_entry:
495
585
  gguf_writer.add_base_model_organization(key, base_model_entry["organization"])
586
+ if "description" in base_model_entry:
587
+ gguf_writer.add_base_model_description(key, base_model_entry["description"])
496
588
  if "url" in base_model_entry:
497
589
  gguf_writer.add_base_model_url(key, base_model_entry["url"])
498
590
  if "doi" in base_model_entry:
@@ -502,9 +594,29 @@ class Metadata:
502
594
  if "repo_url" in base_model_entry:
503
595
  gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"])
504
596
 
597
+ if self.datasets is not None:
598
+ gguf_writer.add_dataset_count(len(self.datasets))
599
+ for key, dataset_entry in enumerate(self.datasets):
600
+ if "name" in dataset_entry:
601
+ gguf_writer.add_dataset_name(key, dataset_entry["name"])
602
+ if "author" in dataset_entry:
603
+ gguf_writer.add_dataset_author(key, dataset_entry["author"])
604
+ if "version" in dataset_entry:
605
+ gguf_writer.add_dataset_version(key, dataset_entry["version"])
606
+ if "organization" in dataset_entry:
607
+ gguf_writer.add_dataset_organization(key, dataset_entry["organization"])
608
+ if "description" in dataset_entry:
609
+ gguf_writer.add_dataset_description(key, dataset_entry["description"])
610
+ if "url" in dataset_entry:
611
+ gguf_writer.add_dataset_url(key, dataset_entry["url"])
612
+ if "doi" in dataset_entry:
613
+ gguf_writer.add_dataset_doi(key, dataset_entry["doi"])
614
+ if "uuid" in dataset_entry:
615
+ gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"])
616
+ if "repo_url" in dataset_entry:
617
+ gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"])
618
+
505
619
  if self.tags is not None:
506
620
  gguf_writer.add_tags(self.tags)
507
621
  if self.languages is not None:
508
622
  gguf_writer.add_languages(self.languages)
509
- if self.datasets is not None:
510
- gguf_writer.add_datasets(self.datasets)
@@ -574,6 +574,87 @@ class Q6_K(__Quant, qtype=GGMLQuantizationType.Q6_K):
574
574
  return (d * q).reshape((n_blocks, QK_K))
575
575
 
576
576
 
577
+ class TQ1_0(__Quant, qtype=GGMLQuantizationType.TQ1_0):
578
+ @classmethod
579
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
580
+ n_blocks = blocks.shape[0]
581
+
582
+ d = abs(blocks).max(axis=-1, keepdims=True)
583
+ with np.errstate(divide="ignore"):
584
+ id = np.where(d == 0, 0, 1 / d)
585
+ qs = np_roundf(blocks * id)
586
+ qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8)
587
+
588
+ qs0, qs1, qh = qs[..., :(32 * 5)], qs[..., (32 * 5):(48 * 5)], qs[..., (48 * 5):]
589
+ qs0 = qs0.reshape((n_blocks, -1, 5, 32)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1))
590
+ qs0 = np.sum(qs0, axis=-2).reshape((n_blocks, -1))
591
+ qs1 = qs1.reshape((n_blocks, -1, 5, 16)) * np.array([81, 27, 9, 3, 1], dtype=np.uint8).reshape((1, 1, 5, 1))
592
+ qs1 = np.sum(qs1, axis=-2).reshape((n_blocks, -1))
593
+ qh = qh.reshape((n_blocks, -1, 4, 4)) * np.array([81, 27, 9, 3], dtype=np.uint8).reshape((1, 1, 4, 1))
594
+ qh = np.sum(qh, axis=-2).reshape((n_blocks, -1))
595
+ qs = np.concatenate([qs0, qs1, qh], axis=-1)
596
+ qs = (qs.astype(np.uint16) * 256 + (243 - 1)) // 243
597
+
598
+ qs = qs.astype(np.uint8)
599
+ d = d.astype(np.float16).view(np.uint8)
600
+
601
+ return np.concatenate([qs, d], axis=-1)
602
+
603
+ @classmethod
604
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
605
+ n_blocks = blocks.shape[0]
606
+
607
+ qs, rest = np.hsplit(blocks, [(QK_K - 4 * QK_K // 64) // 5])
608
+ qh, d = np.hsplit(rest, [QK_K // 64])
609
+
610
+ d = d.view(np.float16).astype(np.float32)
611
+
612
+ qs0, qs1 = qs[..., :32], qs[..., 32:]
613
+ qs0 = qs0.reshape((n_blocks, -1, 1, 32)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1))
614
+ qs0 = qs0.reshape((n_blocks, -1))
615
+ qs1 = qs1.reshape((n_blocks, -1, 1, 16)) * np.array([1, 3, 9, 27, 81], dtype=np.uint8).reshape((1, 1, 5, 1))
616
+ qs1 = qs1.reshape((n_blocks, -1))
617
+ qh = qh.reshape((n_blocks, -1, 1, 4)) * np.array([1, 3, 9, 27], dtype=np.uint8).reshape((1, 1, 4, 1))
618
+ qh = qh.reshape((n_blocks, -1))
619
+ qs = np.concatenate([qs0, qs1, qh], axis=-1)
620
+ qs = ((qs.astype(np.uint16) * 3) >> 8).astype(np.int8) - np.int8(1)
621
+
622
+ return (d * qs.astype(np.float32))
623
+
624
+
625
+ class TQ2_0(__Quant, qtype=GGMLQuantizationType.TQ2_0):
626
+ @classmethod
627
+ def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
628
+ n_blocks = blocks.shape[0]
629
+
630
+ d = abs(blocks).max(axis=-1, keepdims=True)
631
+ with np.errstate(divide="ignore"):
632
+ id = np.where(d == 0, 0, 1 / d)
633
+ qs = np_roundf(blocks * id)
634
+ qs = (qs.astype(np.int8) + np.int8(1)).astype(np.uint8)
635
+
636
+ qs = qs.reshape((n_blocks, -1, 4, 32)) << np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
637
+ qs = qs[..., 0, :] | qs[..., 1, :] | qs[..., 2, :] | qs[..., 3, :]
638
+ qs = qs.reshape((n_blocks, -1))
639
+
640
+ d = d.astype(np.float16).view(np.uint8)
641
+
642
+ return np.concatenate([qs, d], axis=-1)
643
+
644
+ @classmethod
645
+ def dequantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
646
+ n_blocks = blocks.shape[0]
647
+
648
+ qs, d = np.hsplit(blocks, [QK_K // 4])
649
+
650
+ d = d.view(np.float16).astype(np.float32)
651
+
652
+ qs = qs.reshape((n_blocks, -1, 1, 32)) >> np.array([0, 2, 4, 6], dtype=np.uint8).reshape((1, 1, 4, 1))
653
+ qs = (qs & 0x03).reshape((n_blocks, -1)).astype(np.int8) - np.int8(1)
654
+
655
+ return (d * qs.astype(np.float32))
656
+
657
+
577
658
  class IQ2_XXS(__Quant, qtype=GGMLQuantizationType.IQ2_XXS):
578
659
  ksigns: bytes = (
579
660
  b"\x00\x81\x82\x03\x84\x05\x06\x87\x88\x09\x0a\x8b\x0c\x8d\x8e\x0f"