bigdl-core-cpp 2.6.0b20250204__py3-none-win_amd64.whl → 2.6.0b20250206__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +99 -44
- bigdl/cpp/convert_hf_to_gguf_update.py +4 -1
- bigdl/cpp/convert_lora_to_gguf.py +41 -11
- bigdl/cpp/gguf-py/gguf/constants.py +79 -18
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +36 -12
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +17 -15
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250206.data}/scripts/init-ollama.bat +1 -1
- {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250206.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.6.0b20250206.dist-info/RECORD +54 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.6.0b20250204.dist-info/RECORD +0 -50
- {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250206.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250206.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250206.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250206.dist-info}/top_level.txt +0 -0
bigdl/cpp/convert_hf_to_gguf.py
CHANGED
@@ -72,7 +72,8 @@ class Model:
|
|
72
72
|
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
|
73
73
|
use_temp_file: bool = False, eager: bool = False,
|
74
74
|
metadata_override: Path | None = None, model_name: str | None = None,
|
75
|
-
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
75
|
+
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
76
|
+
small_first_shard: bool = False, hparams: dict[str, Any] | None = None):
|
76
77
|
if type(self) is Model:
|
77
78
|
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
78
79
|
|
@@ -87,7 +88,7 @@ class Model:
|
|
87
88
|
self.is_safetensors = len(self.part_names) > 0
|
88
89
|
if not self.is_safetensors:
|
89
90
|
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
90
|
-
self.hparams = Model.load_hparams(self.dir_model)
|
91
|
+
self.hparams = Model.load_hparams(self.dir_model) if hparams is None else hparams
|
91
92
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
92
93
|
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
93
94
|
self.tensor_names = None
|
@@ -573,6 +574,9 @@ class Model:
|
|
573
574
|
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
|
574
575
|
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
|
575
576
|
res = "bert-bge"
|
577
|
+
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
|
578
|
+
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
|
579
|
+
res = "bert-bge-large"
|
576
580
|
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
|
577
581
|
# ref: https://huggingface.co/mosaicml/mpt-7b
|
578
582
|
res = "mpt"
|
@@ -654,6 +658,12 @@ class Model:
|
|
654
658
|
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
|
655
659
|
# ref: https://huggingface.co/facebook/chameleon-7b
|
656
660
|
res = "chameleon"
|
661
|
+
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
|
662
|
+
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
|
663
|
+
res = "minerva-7b"
|
664
|
+
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
|
665
|
+
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
|
666
|
+
res = "roberta-bpe"
|
657
667
|
|
658
668
|
if res is None:
|
659
669
|
logger.warning("\n")
|
@@ -1538,6 +1548,17 @@ class LlamaModel(Model):
|
|
1538
1548
|
special_vocab._set_special_token("eot", 32010)
|
1539
1549
|
special_vocab.add_to_gguf(self.gguf_writer)
|
1540
1550
|
|
1551
|
+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
|
1552
|
+
if tokenizer_config_file.is_file():
|
1553
|
+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
1554
|
+
tokenizer_config_json = json.load(f)
|
1555
|
+
if "add_prefix_space" in tokenizer_config_json:
|
1556
|
+
self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
|
1557
|
+
|
1558
|
+
# Apply to granite small models only
|
1559
|
+
if self.hparams.get("vocab_size", 32000) == 49152:
|
1560
|
+
self.gguf_writer.add_add_bos_token(False)
|
1561
|
+
|
1541
1562
|
def set_gguf_parameters(self):
|
1542
1563
|
super().set_gguf_parameters()
|
1543
1564
|
hparams = self.hparams
|
@@ -1554,17 +1575,6 @@ class LlamaModel(Model):
|
|
1554
1575
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
1555
1576
|
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
1556
1577
|
|
1557
|
-
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
|
1558
|
-
if tokenizer_config_file.is_file():
|
1559
|
-
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
1560
|
-
tokenizer_config_json = json.load(f)
|
1561
|
-
if "add_prefix_space" in tokenizer_config_json:
|
1562
|
-
self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
|
1563
|
-
|
1564
|
-
# Apply to granite small models only
|
1565
|
-
if self.hparams.get("vocab_size", 32000) == 49152:
|
1566
|
-
self.gguf_writer.add_add_bos_token(False)
|
1567
|
-
|
1568
1578
|
@staticmethod
|
1569
1579
|
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
|
1570
1580
|
if n_head_kv is not None and n_head != n_head_kv:
|
@@ -1827,29 +1837,40 @@ class MiniCPMModel(Model):
|
|
1827
1837
|
model_arch = gguf.MODEL_ARCH.MINICPM
|
1828
1838
|
|
1829
1839
|
def set_gguf_parameters(self):
|
1830
|
-
|
1831
|
-
|
1832
|
-
self.gguf_writer.
|
1833
|
-
|
1834
|
-
self.
|
1835
|
-
self.gguf_writer.
|
1836
|
-
|
1837
|
-
self.
|
1838
|
-
self.gguf_writer.
|
1839
|
-
|
1840
|
+
super().set_gguf_parameters()
|
1841
|
+
embedding_scale = float(self.hparams["scale_emb"])
|
1842
|
+
self.gguf_writer.add_embedding_scale(embedding_scale)
|
1843
|
+
logger.info(f"gguf: (minicpm) embedding_scale = {embedding_scale}")
|
1844
|
+
residual_scale = self.hparams["scale_depth"] / self.hparams["num_hidden_layers"] ** 0.5
|
1845
|
+
self.gguf_writer.add_residual_scale(residual_scale)
|
1846
|
+
logger.info(f"gguf: (minicpm) residual_scale = {residual_scale}")
|
1847
|
+
logit_scale = self.hparams["hidden_size"] / self.hparams["dim_model_base"]
|
1848
|
+
self.gguf_writer.add_logit_scale(logit_scale)
|
1849
|
+
logger.info(f"gguf: (minicpm) logit_scale = {logit_scale}")
|
1850
|
+
if self.hparams.get("rope_scaling") is not None:
|
1851
|
+
if self.hparams["rope_scaling"].get("type") == "longrope":
|
1852
|
+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LONGROPE)
|
1853
|
+
logger.info(f"gguf: (minicpm) rope_scaling_type = {gguf.RopeScalingType.LONGROPE}")
|
1840
1854
|
|
1841
|
-
def
|
1842
|
-
self.
|
1855
|
+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
1856
|
+
rope_dims = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
1843
1857
|
|
1844
|
-
|
1845
|
-
if
|
1846
|
-
|
1858
|
+
rope_scaling = self.find_hparam(['rope_scaling'], True)
|
1859
|
+
if rope_scaling is not None:
|
1860
|
+
long_factors = rope_scaling.get('long_factor', None)
|
1861
|
+
short_factors = rope_scaling.get('short_factor', None)
|
1847
1862
|
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1863
|
+
if long_factors is None or short_factors is None:
|
1864
|
+
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
|
1865
|
+
|
1866
|
+
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
|
1867
|
+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
|
1868
|
+
|
1869
|
+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
|
1870
|
+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))
|
1871
|
+
|
1872
|
+
def set_vocab(self):
|
1873
|
+
self._set_vocab_sentencepiece()
|
1853
1874
|
|
1854
1875
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
1855
1876
|
del bid # unused
|
@@ -1859,9 +1880,9 @@ class MiniCPMModel(Model):
|
|
1859
1880
|
|
1860
1881
|
# HF models permute some of the tensors, so we need to undo that
|
1861
1882
|
if name.endswith(("q_proj.weight")):
|
1862
|
-
data_torch =
|
1883
|
+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
|
1863
1884
|
if name.endswith(("k_proj.weight")):
|
1864
|
-
data_torch =
|
1885
|
+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
|
1865
1886
|
|
1866
1887
|
return [(self.map_tensor_name(name), data_torch)]
|
1867
1888
|
|
@@ -1971,6 +1992,37 @@ class Qwen2Model(Model):
|
|
1971
1992
|
except FileNotFoundError:
|
1972
1993
|
self._set_vocab_gpt2()
|
1973
1994
|
|
1995
|
+
def set_gguf_parameters(self):
|
1996
|
+
super().set_gguf_parameters()
|
1997
|
+
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
|
1998
|
+
if self.hparams["rope_scaling"].get("type") == "yarn":
|
1999
|
+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
2000
|
+
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
2001
|
+
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
|
2002
|
+
|
2003
|
+
|
2004
|
+
@Model.register("Qwen2VLForConditionalGeneration")
|
2005
|
+
class Qwen2VLModel(Model):
|
2006
|
+
model_arch = gguf.MODEL_ARCH.QWEN2VL
|
2007
|
+
|
2008
|
+
def set_gguf_parameters(self):
|
2009
|
+
super().set_gguf_parameters()
|
2010
|
+
mrope_section = self.hparams["rope_scaling"]["mrope_section"]
|
2011
|
+
mrope_section += [0] * max(0, 4 - len(mrope_section))
|
2012
|
+
self.gguf_writer.add_rope_dimension_sections(mrope_section)
|
2013
|
+
|
2014
|
+
def set_vocab(self):
|
2015
|
+
try:
|
2016
|
+
self._set_vocab_sentencepiece()
|
2017
|
+
except FileNotFoundError:
|
2018
|
+
self._set_vocab_gpt2()
|
2019
|
+
|
2020
|
+
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
2021
|
+
for name, data in super().get_tensors():
|
2022
|
+
if name.startswith("visual."):
|
2023
|
+
continue
|
2024
|
+
yield name, data
|
2025
|
+
|
1974
2026
|
|
1975
2027
|
@Model.register("Qwen2MoeForCausalLM")
|
1976
2028
|
class Qwen2MoeModel(Model):
|
@@ -2515,7 +2567,7 @@ class InternLM2Model(Model):
|
|
2515
2567
|
return [(self.map_tensor_name(name), data_torch)]
|
2516
2568
|
|
2517
2569
|
|
2518
|
-
@Model.register("BertModel", "CamembertModel")
|
2570
|
+
@Model.register("BertModel", "CamembertModel", "RobertaModel")
|
2519
2571
|
class BertModel(Model):
|
2520
2572
|
model_arch = gguf.MODEL_ARCH.BERT
|
2521
2573
|
|
@@ -2556,7 +2608,8 @@ class BertModel(Model):
|
|
2556
2608
|
|
2557
2609
|
# we need this to validate the size of the token_type embeddings
|
2558
2610
|
# though currently we are passing all zeros to the token_type embeddings
|
2559
|
-
|
2611
|
+
# "Sequence A" or "Sequence B"
|
2612
|
+
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
2560
2613
|
|
2561
2614
|
# convert to phantom space vocab
|
2562
2615
|
def phantom(tok):
|
@@ -2703,7 +2756,7 @@ class XLMRobertaModel(BertModel):
|
|
2703
2756
|
self.gguf_writer.add_token_scores(scores)
|
2704
2757
|
self.gguf_writer.add_token_types(toktypes)
|
2705
2758
|
self.gguf_writer.add_add_space_prefix(add_prefix)
|
2706
|
-
self.gguf_writer.add_token_type_count(1)
|
2759
|
+
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
2707
2760
|
self.gguf_writer.add_remove_extra_whitespaces(remove_whitespaces)
|
2708
2761
|
if precompiled_charsmap:
|
2709
2762
|
self.gguf_writer.add_precompiled_charsmap(precompiled_charsmap)
|
@@ -2864,6 +2917,9 @@ class Rwkv6Model(Model):
|
|
2864
2917
|
self.gguf_writer.add_token_list(tokens)
|
2865
2918
|
self.gguf_writer.add_token_types(toktypes)
|
2866
2919
|
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
|
2920
|
+
special_vocab.chat_template = "rwkv-world"
|
2921
|
+
# hack: Add '\n\n' as the EOT token to make it chat normally
|
2922
|
+
special_vocab._set_special_token("eot", 261)
|
2867
2923
|
special_vocab.add_to_gguf(self.gguf_writer)
|
2868
2924
|
|
2869
2925
|
def set_gguf_parameters(self):
|
@@ -3033,6 +3089,11 @@ class OlmoModel(Model):
|
|
3033
3089
|
return [(self.map_tensor_name(name), data_torch)]
|
3034
3090
|
|
3035
3091
|
|
3092
|
+
@Model.register("Olmo2ForCausalLM")
|
3093
|
+
class Olmo2Model(Model):
|
3094
|
+
model_arch = gguf.MODEL_ARCH.OLMO2
|
3095
|
+
|
3096
|
+
|
3036
3097
|
@Model.register("OlmoeForCausalLM")
|
3037
3098
|
class OlmoeModel(Model):
|
3038
3099
|
model_arch = gguf.MODEL_ARCH.OLMOE
|
@@ -3741,10 +3802,7 @@ class JaisModel(Model):
|
|
3741
3802
|
|
3742
3803
|
# Embeddings scale
|
3743
3804
|
self.embeddings_scale = 1.0
|
3744
|
-
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
3745
|
-
self.output_is_wte = False
|
3746
3805
|
if 'mup_embeddings_scale' in self.hparams:
|
3747
|
-
self.output_is_wte = True # Hack (?)
|
3748
3806
|
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
3749
3807
|
elif 'embeddings_scale' in self.hparams:
|
3750
3808
|
self.embeddings_scale = self.hparams['embeddings_scale']
|
@@ -3801,10 +3859,7 @@ class JaisModel(Model):
|
|
3801
3859
|
|
3802
3860
|
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
3803
3861
|
tensors.append((new_name, data_torch * self.embeddings_scale))
|
3804
|
-
if self.output_is_wte:
|
3805
|
-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
3806
3862
|
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
3807
|
-
assert not self.output_is_wte
|
3808
3863
|
tensors.append((new_name, data_torch * self.width_scale))
|
3809
3864
|
else:
|
3810
3865
|
tensors.append((new_name, data_torch))
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#
|
18
18
|
# python3 convert_hf_to_gguf_update.py <huggingface_token>
|
19
19
|
#
|
20
|
-
# -
|
20
|
+
# - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
|
21
21
|
# - Update llama.cpp with the new pre-tokenizer if necessary
|
22
22
|
#
|
23
23
|
# TODO: generate tokenizer tests for llama.cpp
|
@@ -72,6 +72,7 @@ models = [
|
|
72
72
|
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
|
73
73
|
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
|
74
74
|
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
|
75
|
+
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
|
75
76
|
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
|
76
77
|
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
|
77
78
|
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
|
@@ -101,6 +102,8 @@ models = [
|
|
101
102
|
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
|
102
103
|
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
|
103
104
|
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
|
105
|
+
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
|
106
|
+
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
|
104
107
|
]
|
105
108
|
|
106
109
|
|
@@ -12,6 +12,7 @@ import json
|
|
12
12
|
from math import prod
|
13
13
|
from pathlib import Path
|
14
14
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast
|
15
|
+
from transformers import AutoConfig
|
15
16
|
|
16
17
|
import torch
|
17
18
|
|
@@ -230,7 +231,7 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
|
|
230
231
|
|
231
232
|
def parse_args() -> argparse.Namespace:
|
232
233
|
parser = argparse.ArgumentParser(
|
233
|
-
description="Convert a
|
234
|
+
description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file")
|
234
235
|
parser.add_argument(
|
235
236
|
"--outfile", type=Path,
|
236
237
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
@@ -256,17 +257,23 @@ def parse_args() -> argparse.Namespace:
|
|
256
257
|
help="only print out what will be done, without writing any new files",
|
257
258
|
)
|
258
259
|
parser.add_argument(
|
259
|
-
"--base", type=Path,
|
260
|
-
help="directory containing base model
|
260
|
+
"--base", type=Path,
|
261
|
+
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
|
261
262
|
)
|
262
263
|
parser.add_argument(
|
263
264
|
"lora_path", type=Path,
|
264
|
-
help="directory containing LoRA
|
265
|
+
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
|
265
266
|
)
|
266
267
|
|
267
268
|
return parser.parse_args()
|
268
269
|
|
269
270
|
|
271
|
+
def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
|
272
|
+
# normally, adapter does not come with base model config, we need to load it from AutoConfig
|
273
|
+
config = AutoConfig.from_pretrained(hf_model_id)
|
274
|
+
return config.to_dict()
|
275
|
+
|
276
|
+
|
270
277
|
if __name__ == '__main__':
|
271
278
|
args = parse_args()
|
272
279
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
@@ -281,7 +288,7 @@ if __name__ == '__main__':
|
|
281
288
|
|
282
289
|
ftype = ftype_map[args.outtype]
|
283
290
|
|
284
|
-
dir_base_model: Path = args.base
|
291
|
+
dir_base_model: Path | None = args.base
|
285
292
|
dir_lora: Path = args.lora_path
|
286
293
|
lora_config = dir_lora / "adapter_config.json"
|
287
294
|
input_model = dir_lora / "adapter_model.safetensors"
|
@@ -301,9 +308,29 @@ if __name__ == '__main__':
|
|
301
308
|
input_model = os.path.join(dir_lora, "adapter_model.bin")
|
302
309
|
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
|
303
310
|
|
311
|
+
# load LoRA config
|
312
|
+
with open(lora_config, "r") as f:
|
313
|
+
lparams: dict[str, Any] = json.load(f)
|
314
|
+
|
304
315
|
# load base model
|
305
|
-
|
306
|
-
|
316
|
+
if dir_base_model is None:
|
317
|
+
if "base_model_name_or_path" in lparams:
|
318
|
+
model_id = lparams["base_model_name_or_path"]
|
319
|
+
logger.info(f"Loading base model from Hugging Face: {model_id}")
|
320
|
+
try:
|
321
|
+
hparams = load_hparams_from_hf(model_id)
|
322
|
+
except OSError as e:
|
323
|
+
logger.error(f"Failed to load base model config: {e}")
|
324
|
+
logger.error("Please try downloading the base model and add its path to --base")
|
325
|
+
sys.exit(1)
|
326
|
+
else:
|
327
|
+
logger.error("'base_model_name_or_path' is not found in adapter_config.json")
|
328
|
+
logger.error("Base model config is required. Please download the base model and add its path to --base")
|
329
|
+
sys.exit(1)
|
330
|
+
else:
|
331
|
+
logger.info(f"Loading base model: {dir_base_model.name}")
|
332
|
+
hparams = Model.load_hparams(dir_base_model)
|
333
|
+
|
307
334
|
with torch.inference_mode():
|
308
335
|
try:
|
309
336
|
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
@@ -323,13 +350,15 @@ if __name__ == '__main__':
|
|
323
350
|
self.dir_model_card = dir_lora_model
|
324
351
|
self.lora_alpha = float(lora_alpha)
|
325
352
|
|
353
|
+
def set_vocab(self):
|
354
|
+
pass
|
355
|
+
|
326
356
|
def set_type(self):
|
327
357
|
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
328
358
|
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
329
359
|
|
330
360
|
def set_gguf_parameters(self):
|
331
361
|
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
|
332
|
-
super().set_gguf_parameters()
|
333
362
|
|
334
363
|
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
335
364
|
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
|
@@ -348,6 +377,9 @@ if __name__ == '__main__':
|
|
348
377
|
if ".base_layer.weight" in name:
|
349
378
|
continue
|
350
379
|
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
|
380
|
+
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
|
381
|
+
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
|
382
|
+
logger.error("Please refer to https://github.com/ggerganov/llama.cpp/pull/9948")
|
351
383
|
sys.exit(1)
|
352
384
|
|
353
385
|
if base_name in tensor_map:
|
@@ -381,9 +413,6 @@ if __name__ == '__main__':
|
|
381
413
|
yield (dest_name + ".lora_a", lora_a)
|
382
414
|
yield (dest_name + ".lora_b", lora_b)
|
383
415
|
|
384
|
-
with open(lora_config, "r") as f:
|
385
|
-
lparams: dict[str, Any] = json.load(f)
|
386
|
-
|
387
416
|
alpha: float = lparams["lora_alpha"]
|
388
417
|
|
389
418
|
model_instance = LoraModel(
|
@@ -396,6 +425,7 @@ if __name__ == '__main__':
|
|
396
425
|
dry_run=args.dry_run,
|
397
426
|
dir_lora_model=dir_lora,
|
398
427
|
lora_alpha=alpha,
|
428
|
+
hparams=hparams,
|
399
429
|
)
|
400
430
|
|
401
431
|
logger.info("Exporting model...")
|
@@ -64,15 +64,27 @@ class Keys:
|
|
64
64
|
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
65
65
|
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
66
66
|
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
67
|
+
BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
|
67
68
|
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
|
68
69
|
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
69
70
|
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
70
71
|
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
71
72
|
|
73
|
+
# Dataset Source
|
74
|
+
DATASET_COUNT = "general.dataset.count"
|
75
|
+
DATASET_NAME = "general.dataset.{id}.name"
|
76
|
+
DATASET_AUTHOR = "general.dataset.{id}.author"
|
77
|
+
DATASET_VERSION = "general.dataset.{id}.version"
|
78
|
+
DATASET_ORGANIZATION = "general.dataset.{id}.organization"
|
79
|
+
DATASET_DESCRIPTION = "general.dataset.{id}.description"
|
80
|
+
DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
|
81
|
+
DATASET_DOI = "general.dataset.{id}.doi"
|
82
|
+
DATASET_UUID = "general.dataset.{id}.uuid"
|
83
|
+
DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
84
|
+
|
72
85
|
# Array based KV stores
|
73
86
|
TAGS = "general.tags"
|
74
87
|
LANGUAGES = "general.languages"
|
75
|
-
DATASETS = "general.datasets"
|
76
88
|
|
77
89
|
class LLM:
|
78
90
|
VOCAB_SIZE = "{arch}.vocab_size"
|
@@ -119,6 +131,7 @@ class Keys:
|
|
119
131
|
|
120
132
|
class Rope:
|
121
133
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
134
|
+
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
|
122
135
|
FREQ_BASE = "{arch}.rope.freq_base"
|
123
136
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
124
137
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
@@ -152,6 +165,8 @@ class Keys:
|
|
152
165
|
MERGES = "tokenizer.ggml.merges"
|
153
166
|
BOS_ID = "tokenizer.ggml.bos_token_id"
|
154
167
|
EOS_ID = "tokenizer.ggml.eos_token_id"
|
168
|
+
EOT_ID = "tokenizer.ggml.eot_token_id"
|
169
|
+
EOM_ID = "tokenizer.ggml.eom_token_id"
|
155
170
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
156
171
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
157
172
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
@@ -168,11 +183,16 @@ class Keys:
|
|
168
183
|
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
169
184
|
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
170
185
|
# FIM/Infill special tokens constants
|
186
|
+
FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
|
187
|
+
FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
|
188
|
+
FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
|
189
|
+
FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
|
190
|
+
FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
|
191
|
+
FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
|
192
|
+
# deprecated:
|
171
193
|
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
172
194
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
173
195
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
174
|
-
EOT_ID = "tokenizer.ggml.eot_token_id"
|
175
|
-
EOM_ID = "tokenizer.ggml.eom_token_id"
|
176
196
|
|
177
197
|
class Adapter:
|
178
198
|
TYPE = "adapter.type"
|
@@ -207,6 +227,7 @@ class MODEL_ARCH(IntEnum):
|
|
207
227
|
QWEN = auto()
|
208
228
|
QWEN2 = auto()
|
209
229
|
QWEN2MOE = auto()
|
230
|
+
QWEN2VL = auto()
|
210
231
|
PHI2 = auto()
|
211
232
|
PHI3 = auto()
|
212
233
|
PLAMO = auto()
|
@@ -224,6 +245,7 @@ class MODEL_ARCH(IntEnum):
|
|
224
245
|
COMMAND_R = auto()
|
225
246
|
DBRX = auto()
|
226
247
|
OLMO = auto()
|
248
|
+
OLMO2 = auto()
|
227
249
|
OLMOE = auto()
|
228
250
|
OPENELM = auto()
|
229
251
|
ARCTIC = auto()
|
@@ -368,6 +390,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
368
390
|
MODEL_ARCH.QWEN: "qwen",
|
369
391
|
MODEL_ARCH.QWEN2: "qwen2",
|
370
392
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
393
|
+
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
371
394
|
MODEL_ARCH.PHI2: "phi2",
|
372
395
|
MODEL_ARCH.PHI3: "phi3",
|
373
396
|
MODEL_ARCH.PLAMO: "plamo",
|
@@ -385,6 +408,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
385
408
|
MODEL_ARCH.COMMAND_R: "command-r",
|
386
409
|
MODEL_ARCH.DBRX: "dbrx",
|
387
410
|
MODEL_ARCH.OLMO: "olmo",
|
411
|
+
MODEL_ARCH.OLMO2: "olmo2",
|
388
412
|
MODEL_ARCH.OLMOE: "olmoe",
|
389
413
|
MODEL_ARCH.OPENELM: "openelm",
|
390
414
|
MODEL_ARCH.ARCTIC: "arctic",
|
@@ -737,6 +761,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
737
761
|
MODEL_TENSOR.FFN_UP,
|
738
762
|
],
|
739
763
|
MODEL_ARCH.QWEN2: [
|
764
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
765
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
766
|
+
MODEL_TENSOR.OUTPUT,
|
767
|
+
MODEL_TENSOR.ROPE_FREQS,
|
768
|
+
MODEL_TENSOR.ATTN_NORM,
|
769
|
+
MODEL_TENSOR.ATTN_Q,
|
770
|
+
MODEL_TENSOR.ATTN_K,
|
771
|
+
MODEL_TENSOR.ATTN_V,
|
772
|
+
MODEL_TENSOR.ATTN_OUT,
|
773
|
+
MODEL_TENSOR.FFN_NORM,
|
774
|
+
MODEL_TENSOR.FFN_GATE,
|
775
|
+
MODEL_TENSOR.FFN_DOWN,
|
776
|
+
MODEL_TENSOR.FFN_UP,
|
777
|
+
],
|
778
|
+
MODEL_ARCH.QWEN2VL: [
|
740
779
|
MODEL_TENSOR.TOKEN_EMBD,
|
741
780
|
MODEL_TENSOR.OUTPUT_NORM,
|
742
781
|
MODEL_TENSOR.OUTPUT,
|
@@ -875,6 +914,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
875
914
|
MODEL_TENSOR.OUTPUT,
|
876
915
|
MODEL_TENSOR.OUTPUT_NORM,
|
877
916
|
MODEL_TENSOR.ROPE_FREQS,
|
917
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
918
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
878
919
|
MODEL_TENSOR.ATTN_NORM,
|
879
920
|
MODEL_TENSOR.ATTN_Q,
|
880
921
|
MODEL_TENSOR.ATTN_K,
|
@@ -1050,6 +1091,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1050
1091
|
MODEL_TENSOR.FFN_DOWN,
|
1051
1092
|
MODEL_TENSOR.FFN_UP,
|
1052
1093
|
],
|
1094
|
+
MODEL_ARCH.OLMO2: [
|
1095
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1096
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1097
|
+
MODEL_TENSOR.OUTPUT,
|
1098
|
+
MODEL_TENSOR.ATTN_Q,
|
1099
|
+
MODEL_TENSOR.ATTN_K,
|
1100
|
+
MODEL_TENSOR.ATTN_V,
|
1101
|
+
MODEL_TENSOR.ATTN_OUT,
|
1102
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
1103
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1104
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1105
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
1106
|
+
MODEL_TENSOR.FFN_GATE,
|
1107
|
+
MODEL_TENSOR.FFN_DOWN,
|
1108
|
+
MODEL_TENSOR.FFN_UP,
|
1109
|
+
],
|
1053
1110
|
MODEL_ARCH.OLMOE: [
|
1054
1111
|
MODEL_TENSOR.TOKEN_EMBD,
|
1055
1112
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1351,9 +1408,10 @@ class TokenType(IntEnum):
|
|
1351
1408
|
|
1352
1409
|
|
1353
1410
|
class RopeScalingType(Enum):
|
1354
|
-
NONE
|
1355
|
-
LINEAR
|
1356
|
-
YARN
|
1411
|
+
NONE = 'none'
|
1412
|
+
LINEAR = 'linear'
|
1413
|
+
YARN = 'yarn'
|
1414
|
+
LONGROPE = 'longrope'
|
1357
1415
|
|
1358
1416
|
|
1359
1417
|
class PoolingType(IntEnum):
|
@@ -1392,9 +1450,6 @@ class GGMLQuantizationType(IntEnum):
|
|
1392
1450
|
F64 = 28
|
1393
1451
|
IQ1_M = 29
|
1394
1452
|
BF16 = 30
|
1395
|
-
Q4_0_4_4 = 31
|
1396
|
-
Q4_0_4_8 = 32
|
1397
|
-
Q4_0_8_8 = 33
|
1398
1453
|
TQ1_0 = 34
|
1399
1454
|
TQ2_0 = 35
|
1400
1455
|
|
@@ -1438,9 +1493,9 @@ class LlamaFileType(IntEnum):
|
|
1438
1493
|
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
1439
1494
|
MOSTLY_IQ1_M = 31 # except 1d tensors
|
1440
1495
|
MOSTLY_BF16 = 32 # except 1d tensors
|
1441
|
-
MOSTLY_Q4_0_4_4 = 33 #
|
1442
|
-
MOSTLY_Q4_0_4_8 = 34 #
|
1443
|
-
MOSTLY_Q4_0_8_8 = 35 #
|
1496
|
+
# MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
|
1497
|
+
# MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
|
1498
|
+
# MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
|
1444
1499
|
MOSTLY_TQ1_0 = 36 # except 1d tensors
|
1445
1500
|
MOSTLY_TQ2_0 = 37 # except 1d tensors
|
1446
1501
|
|
@@ -1516,9 +1571,6 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
|
1516
1571
|
GGMLQuantizationType.F64: (1, 8),
|
1517
1572
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
1518
1573
|
GGMLQuantizationType.BF16: (1, 2),
|
1519
|
-
GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
|
1520
|
-
GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
|
1521
|
-
GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
|
1522
1574
|
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
|
1523
1575
|
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
|
1524
1576
|
}
|
@@ -1579,6 +1631,8 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
|
|
1579
1631
|
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
|
1580
1632
|
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
|
1581
1633
|
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
|
1634
|
+
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1635
|
+
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
1582
1636
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
1583
1637
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
1584
1638
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
@@ -1586,8 +1640,15 @@ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
|
1586
1640
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
1587
1641
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
1588
1642
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
1589
|
-
|
1643
|
+
|
1644
|
+
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
|
1645
|
+
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
|
1646
|
+
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
|
1647
|
+
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
|
1648
|
+
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
|
1649
|
+
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
|
1650
|
+
|
1651
|
+
# deprecated
|
1652
|
+
KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
|
1590
1653
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
1591
1654
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
1592
|
-
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1593
|
-
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
bigdl/cpp/gguf-py/gguf/gguf.py
CHANGED
@@ -145,11 +145,10 @@ class GGUFReader:
|
|
145
145
|
count = int(count)
|
146
146
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
147
147
|
end_offs = offset + itemsize * count
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
)
|
148
|
+
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
149
|
+
if override_order is None:
|
150
|
+
return arr
|
151
|
+
return arr.view(arr.dtype.newbyteorder(override_order))
|
153
152
|
|
154
153
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
155
154
|
if field.name in self.fields:
|
@@ -314,4 +313,4 @@ class GGUFReader:
|
|
314
313
|
data = self._get(data_offs, item_type, item_count).reshape(np_dims),
|
315
314
|
field = field,
|
316
315
|
))
|
317
|
-
self.tensors = tensors
|
316
|
+
self.tensors = tensors
|