bigdl-core-cpp 2.6.0b20250204__py3-none-win_amd64.whl → 2.6.0b20250204.post0__py3-none-win_amd64.whl
Sign up to get free protection for your applications and to get access to all the features.
- bigdl/cpp/convert_hf_to_gguf.py +99 -44
- bigdl/cpp/convert_hf_to_gguf_update.py +4 -1
- bigdl/cpp/convert_lora_to_gguf.py +41 -11
- bigdl/cpp/gguf-py/gguf/constants.py +79 -18
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +36 -12
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +17 -15
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-ollama.bat +1 -1
- {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.6.0b20250204.post0.dist-info/RECORD +54 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/ipex_llm/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.6.0b20250204.dist-info/RECORD +0 -50
- {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250204.data → bigdl_core_cpp-2.6.0b20250204.post0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.6.0b20250204.dist-info → bigdl_core_cpp-2.6.0b20250204.post0.dist-info}/top_level.txt +0 -0
bigdl/cpp/convert_hf_to_gguf.py
CHANGED
@@ -72,7 +72,8 @@ class Model:
|
|
72
72
|
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False,
|
73
73
|
use_temp_file: bool = False, eager: bool = False,
|
74
74
|
metadata_override: Path | None = None, model_name: str | None = None,
|
75
|
-
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
75
|
+
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
76
|
+
small_first_shard: bool = False, hparams: dict[str, Any] | None = None):
|
76
77
|
if type(self) is Model:
|
77
78
|
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
78
79
|
|
@@ -87,7 +88,7 @@ class Model:
|
|
87
88
|
self.is_safetensors = len(self.part_names) > 0
|
88
89
|
if not self.is_safetensors:
|
89
90
|
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
90
|
-
self.hparams = Model.load_hparams(self.dir_model)
|
91
|
+
self.hparams = Model.load_hparams(self.dir_model) if hparams is None else hparams
|
91
92
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
92
93
|
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
93
94
|
self.tensor_names = None
|
@@ -573,6 +574,9 @@ class Model:
|
|
573
574
|
if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
|
574
575
|
# ref: https://huggingface.co/BAAI/bge-small-en-v1.5
|
575
576
|
res = "bert-bge"
|
577
|
+
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
|
578
|
+
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
|
579
|
+
res = "bert-bge-large"
|
576
580
|
if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166":
|
577
581
|
# ref: https://huggingface.co/mosaicml/mpt-7b
|
578
582
|
res = "mpt"
|
@@ -654,6 +658,12 @@ class Model:
|
|
654
658
|
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
|
655
659
|
# ref: https://huggingface.co/facebook/chameleon-7b
|
656
660
|
res = "chameleon"
|
661
|
+
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
|
662
|
+
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
|
663
|
+
res = "minerva-7b"
|
664
|
+
if chkhsh == "8b5a93ed704057481f240da0be7e7dca721d7f8f4755263b6807227a2cbeae65":
|
665
|
+
# ref: https://huggingface.co/sentence-transformers/stsb-roberta-base
|
666
|
+
res = "roberta-bpe"
|
657
667
|
|
658
668
|
if res is None:
|
659
669
|
logger.warning("\n")
|
@@ -1538,6 +1548,17 @@ class LlamaModel(Model):
|
|
1538
1548
|
special_vocab._set_special_token("eot", 32010)
|
1539
1549
|
special_vocab.add_to_gguf(self.gguf_writer)
|
1540
1550
|
|
1551
|
+
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
|
1552
|
+
if tokenizer_config_file.is_file():
|
1553
|
+
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
1554
|
+
tokenizer_config_json = json.load(f)
|
1555
|
+
if "add_prefix_space" in tokenizer_config_json:
|
1556
|
+
self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
|
1557
|
+
|
1558
|
+
# Apply to granite small models only
|
1559
|
+
if self.hparams.get("vocab_size", 32000) == 49152:
|
1560
|
+
self.gguf_writer.add_add_bos_token(False)
|
1561
|
+
|
1541
1562
|
def set_gguf_parameters(self):
|
1542
1563
|
super().set_gguf_parameters()
|
1543
1564
|
hparams = self.hparams
|
@@ -1554,17 +1575,6 @@ class LlamaModel(Model):
|
|
1554
1575
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
1555
1576
|
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
1556
1577
|
|
1557
|
-
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
|
1558
|
-
if tokenizer_config_file.is_file():
|
1559
|
-
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
|
1560
|
-
tokenizer_config_json = json.load(f)
|
1561
|
-
if "add_prefix_space" in tokenizer_config_json:
|
1562
|
-
self.gguf_writer.add_add_space_prefix(tokenizer_config_json["add_prefix_space"])
|
1563
|
-
|
1564
|
-
# Apply to granite small models only
|
1565
|
-
if self.hparams.get("vocab_size", 32000) == 49152:
|
1566
|
-
self.gguf_writer.add_add_bos_token(False)
|
1567
|
-
|
1568
1578
|
@staticmethod
|
1569
1579
|
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
|
1570
1580
|
if n_head_kv is not None and n_head != n_head_kv:
|
@@ -1827,29 +1837,40 @@ class MiniCPMModel(Model):
|
|
1827
1837
|
model_arch = gguf.MODEL_ARCH.MINICPM
|
1828
1838
|
|
1829
1839
|
def set_gguf_parameters(self):
|
1830
|
-
|
1831
|
-
|
1832
|
-
self.gguf_writer.
|
1833
|
-
|
1834
|
-
self.
|
1835
|
-
self.gguf_writer.
|
1836
|
-
|
1837
|
-
self.
|
1838
|
-
self.gguf_writer.
|
1839
|
-
|
1840
|
+
super().set_gguf_parameters()
|
1841
|
+
embedding_scale = float(self.hparams["scale_emb"])
|
1842
|
+
self.gguf_writer.add_embedding_scale(embedding_scale)
|
1843
|
+
logger.info(f"gguf: (minicpm) embedding_scale = {embedding_scale}")
|
1844
|
+
residual_scale = self.hparams["scale_depth"] / self.hparams["num_hidden_layers"] ** 0.5
|
1845
|
+
self.gguf_writer.add_residual_scale(residual_scale)
|
1846
|
+
logger.info(f"gguf: (minicpm) residual_scale = {residual_scale}")
|
1847
|
+
logit_scale = self.hparams["hidden_size"] / self.hparams["dim_model_base"]
|
1848
|
+
self.gguf_writer.add_logit_scale(logit_scale)
|
1849
|
+
logger.info(f"gguf: (minicpm) logit_scale = {logit_scale}")
|
1850
|
+
if self.hparams.get("rope_scaling") is not None:
|
1851
|
+
if self.hparams["rope_scaling"].get("type") == "longrope":
|
1852
|
+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LONGROPE)
|
1853
|
+
logger.info(f"gguf: (minicpm) rope_scaling_type = {gguf.RopeScalingType.LONGROPE}")
|
1840
1854
|
|
1841
|
-
def
|
1842
|
-
self.
|
1855
|
+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
1856
|
+
rope_dims = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
|
1843
1857
|
|
1844
|
-
|
1845
|
-
if
|
1846
|
-
|
1858
|
+
rope_scaling = self.find_hparam(['rope_scaling'], True)
|
1859
|
+
if rope_scaling is not None:
|
1860
|
+
long_factors = rope_scaling.get('long_factor', None)
|
1861
|
+
short_factors = rope_scaling.get('short_factor', None)
|
1847
1862
|
|
1848
|
-
|
1849
|
-
|
1850
|
-
|
1851
|
-
|
1852
|
-
|
1863
|
+
if long_factors is None or short_factors is None:
|
1864
|
+
raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
|
1865
|
+
|
1866
|
+
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
|
1867
|
+
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
|
1868
|
+
|
1869
|
+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
|
1870
|
+
yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))
|
1871
|
+
|
1872
|
+
def set_vocab(self):
|
1873
|
+
self._set_vocab_sentencepiece()
|
1853
1874
|
|
1854
1875
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
1855
1876
|
del bid # unused
|
@@ -1859,9 +1880,9 @@ class MiniCPMModel(Model):
|
|
1859
1880
|
|
1860
1881
|
# HF models permute some of the tensors, so we need to undo that
|
1861
1882
|
if name.endswith(("q_proj.weight")):
|
1862
|
-
data_torch =
|
1883
|
+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
|
1863
1884
|
if name.endswith(("k_proj.weight")):
|
1864
|
-
data_torch =
|
1885
|
+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
|
1865
1886
|
|
1866
1887
|
return [(self.map_tensor_name(name), data_torch)]
|
1867
1888
|
|
@@ -1971,6 +1992,37 @@ class Qwen2Model(Model):
|
|
1971
1992
|
except FileNotFoundError:
|
1972
1993
|
self._set_vocab_gpt2()
|
1973
1994
|
|
1995
|
+
def set_gguf_parameters(self):
|
1996
|
+
super().set_gguf_parameters()
|
1997
|
+
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
|
1998
|
+
if self.hparams["rope_scaling"].get("type") == "yarn":
|
1999
|
+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
2000
|
+
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
2001
|
+
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
|
2002
|
+
|
2003
|
+
|
2004
|
+
@Model.register("Qwen2VLForConditionalGeneration")
|
2005
|
+
class Qwen2VLModel(Model):
|
2006
|
+
model_arch = gguf.MODEL_ARCH.QWEN2VL
|
2007
|
+
|
2008
|
+
def set_gguf_parameters(self):
|
2009
|
+
super().set_gguf_parameters()
|
2010
|
+
mrope_section = self.hparams["rope_scaling"]["mrope_section"]
|
2011
|
+
mrope_section += [0] * max(0, 4 - len(mrope_section))
|
2012
|
+
self.gguf_writer.add_rope_dimension_sections(mrope_section)
|
2013
|
+
|
2014
|
+
def set_vocab(self):
|
2015
|
+
try:
|
2016
|
+
self._set_vocab_sentencepiece()
|
2017
|
+
except FileNotFoundError:
|
2018
|
+
self._set_vocab_gpt2()
|
2019
|
+
|
2020
|
+
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
2021
|
+
for name, data in super().get_tensors():
|
2022
|
+
if name.startswith("visual."):
|
2023
|
+
continue
|
2024
|
+
yield name, data
|
2025
|
+
|
1974
2026
|
|
1975
2027
|
@Model.register("Qwen2MoeForCausalLM")
|
1976
2028
|
class Qwen2MoeModel(Model):
|
@@ -2515,7 +2567,7 @@ class InternLM2Model(Model):
|
|
2515
2567
|
return [(self.map_tensor_name(name), data_torch)]
|
2516
2568
|
|
2517
2569
|
|
2518
|
-
@Model.register("BertModel", "CamembertModel")
|
2570
|
+
@Model.register("BertModel", "CamembertModel", "RobertaModel")
|
2519
2571
|
class BertModel(Model):
|
2520
2572
|
model_arch = gguf.MODEL_ARCH.BERT
|
2521
2573
|
|
@@ -2556,7 +2608,8 @@ class BertModel(Model):
|
|
2556
2608
|
|
2557
2609
|
# we need this to validate the size of the token_type embeddings
|
2558
2610
|
# though currently we are passing all zeros to the token_type embeddings
|
2559
|
-
|
2611
|
+
# "Sequence A" or "Sequence B"
|
2612
|
+
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
2560
2613
|
|
2561
2614
|
# convert to phantom space vocab
|
2562
2615
|
def phantom(tok):
|
@@ -2703,7 +2756,7 @@ class XLMRobertaModel(BertModel):
|
|
2703
2756
|
self.gguf_writer.add_token_scores(scores)
|
2704
2757
|
self.gguf_writer.add_token_types(toktypes)
|
2705
2758
|
self.gguf_writer.add_add_space_prefix(add_prefix)
|
2706
|
-
self.gguf_writer.add_token_type_count(1)
|
2759
|
+
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))
|
2707
2760
|
self.gguf_writer.add_remove_extra_whitespaces(remove_whitespaces)
|
2708
2761
|
if precompiled_charsmap:
|
2709
2762
|
self.gguf_writer.add_precompiled_charsmap(precompiled_charsmap)
|
@@ -2864,6 +2917,9 @@ class Rwkv6Model(Model):
|
|
2864
2917
|
self.gguf_writer.add_token_list(tokens)
|
2865
2918
|
self.gguf_writer.add_token_types(toktypes)
|
2866
2919
|
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
|
2920
|
+
special_vocab.chat_template = "rwkv-world"
|
2921
|
+
# hack: Add '\n\n' as the EOT token to make it chat normally
|
2922
|
+
special_vocab._set_special_token("eot", 261)
|
2867
2923
|
special_vocab.add_to_gguf(self.gguf_writer)
|
2868
2924
|
|
2869
2925
|
def set_gguf_parameters(self):
|
@@ -3033,6 +3089,11 @@ class OlmoModel(Model):
|
|
3033
3089
|
return [(self.map_tensor_name(name), data_torch)]
|
3034
3090
|
|
3035
3091
|
|
3092
|
+
@Model.register("Olmo2ForCausalLM")
|
3093
|
+
class Olmo2Model(Model):
|
3094
|
+
model_arch = gguf.MODEL_ARCH.OLMO2
|
3095
|
+
|
3096
|
+
|
3036
3097
|
@Model.register("OlmoeForCausalLM")
|
3037
3098
|
class OlmoeModel(Model):
|
3038
3099
|
model_arch = gguf.MODEL_ARCH.OLMOE
|
@@ -3741,10 +3802,7 @@ class JaisModel(Model):
|
|
3741
3802
|
|
3742
3803
|
# Embeddings scale
|
3743
3804
|
self.embeddings_scale = 1.0
|
3744
|
-
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
3745
|
-
self.output_is_wte = False
|
3746
3805
|
if 'mup_embeddings_scale' in self.hparams:
|
3747
|
-
self.output_is_wte = True # Hack (?)
|
3748
3806
|
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
3749
3807
|
elif 'embeddings_scale' in self.hparams:
|
3750
3808
|
self.embeddings_scale = self.hparams['embeddings_scale']
|
@@ -3801,10 +3859,7 @@ class JaisModel(Model):
|
|
3801
3859
|
|
3802
3860
|
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
3803
3861
|
tensors.append((new_name, data_torch * self.embeddings_scale))
|
3804
|
-
if self.output_is_wte:
|
3805
|
-
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
3806
3862
|
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
3807
|
-
assert not self.output_is_wte
|
3808
3863
|
tensors.append((new_name, data_torch * self.width_scale))
|
3809
3864
|
else:
|
3810
3865
|
tensors.append((new_name, data_torch))
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#
|
18
18
|
# python3 convert_hf_to_gguf_update.py <huggingface_token>
|
19
19
|
#
|
20
|
-
# -
|
20
|
+
# - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
|
21
21
|
# - Update llama.cpp with the new pre-tokenizer if necessary
|
22
22
|
#
|
23
23
|
# TODO: generate tokenizer tests for llama.cpp
|
@@ -72,6 +72,7 @@ models = [
|
|
72
72
|
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
|
73
73
|
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
|
74
74
|
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
|
75
|
+
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
|
75
76
|
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
|
76
77
|
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
|
77
78
|
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
|
@@ -101,6 +102,8 @@ models = [
|
|
101
102
|
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
|
102
103
|
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
|
103
104
|
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
|
105
|
+
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
|
106
|
+
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
|
104
107
|
]
|
105
108
|
|
106
109
|
|
@@ -12,6 +12,7 @@ import json
|
|
12
12
|
from math import prod
|
13
13
|
from pathlib import Path
|
14
14
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast
|
15
|
+
from transformers import AutoConfig
|
15
16
|
|
16
17
|
import torch
|
17
18
|
|
@@ -230,7 +231,7 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
|
|
230
231
|
|
231
232
|
def parse_args() -> argparse.Namespace:
|
232
233
|
parser = argparse.ArgumentParser(
|
233
|
-
description="Convert a
|
234
|
+
description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file")
|
234
235
|
parser.add_argument(
|
235
236
|
"--outfile", type=Path,
|
236
237
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
@@ -256,17 +257,23 @@ def parse_args() -> argparse.Namespace:
|
|
256
257
|
help="only print out what will be done, without writing any new files",
|
257
258
|
)
|
258
259
|
parser.add_argument(
|
259
|
-
"--base", type=Path,
|
260
|
-
help="directory containing base model
|
260
|
+
"--base", type=Path,
|
261
|
+
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
|
261
262
|
)
|
262
263
|
parser.add_argument(
|
263
264
|
"lora_path", type=Path,
|
264
|
-
help="directory containing LoRA
|
265
|
+
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
|
265
266
|
)
|
266
267
|
|
267
268
|
return parser.parse_args()
|
268
269
|
|
269
270
|
|
271
|
+
def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
|
272
|
+
# normally, adapter does not come with base model config, we need to load it from AutoConfig
|
273
|
+
config = AutoConfig.from_pretrained(hf_model_id)
|
274
|
+
return config.to_dict()
|
275
|
+
|
276
|
+
|
270
277
|
if __name__ == '__main__':
|
271
278
|
args = parse_args()
|
272
279
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
@@ -281,7 +288,7 @@ if __name__ == '__main__':
|
|
281
288
|
|
282
289
|
ftype = ftype_map[args.outtype]
|
283
290
|
|
284
|
-
dir_base_model: Path = args.base
|
291
|
+
dir_base_model: Path | None = args.base
|
285
292
|
dir_lora: Path = args.lora_path
|
286
293
|
lora_config = dir_lora / "adapter_config.json"
|
287
294
|
input_model = dir_lora / "adapter_model.safetensors"
|
@@ -301,9 +308,29 @@ if __name__ == '__main__':
|
|
301
308
|
input_model = os.path.join(dir_lora, "adapter_model.bin")
|
302
309
|
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
|
303
310
|
|
311
|
+
# load LoRA config
|
312
|
+
with open(lora_config, "r") as f:
|
313
|
+
lparams: dict[str, Any] = json.load(f)
|
314
|
+
|
304
315
|
# load base model
|
305
|
-
|
306
|
-
|
316
|
+
if dir_base_model is None:
|
317
|
+
if "base_model_name_or_path" in lparams:
|
318
|
+
model_id = lparams["base_model_name_or_path"]
|
319
|
+
logger.info(f"Loading base model from Hugging Face: {model_id}")
|
320
|
+
try:
|
321
|
+
hparams = load_hparams_from_hf(model_id)
|
322
|
+
except OSError as e:
|
323
|
+
logger.error(f"Failed to load base model config: {e}")
|
324
|
+
logger.error("Please try downloading the base model and add its path to --base")
|
325
|
+
sys.exit(1)
|
326
|
+
else:
|
327
|
+
logger.error("'base_model_name_or_path' is not found in adapter_config.json")
|
328
|
+
logger.error("Base model config is required. Please download the base model and add its path to --base")
|
329
|
+
sys.exit(1)
|
330
|
+
else:
|
331
|
+
logger.info(f"Loading base model: {dir_base_model.name}")
|
332
|
+
hparams = Model.load_hparams(dir_base_model)
|
333
|
+
|
307
334
|
with torch.inference_mode():
|
308
335
|
try:
|
309
336
|
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
@@ -323,13 +350,15 @@ if __name__ == '__main__':
|
|
323
350
|
self.dir_model_card = dir_lora_model
|
324
351
|
self.lora_alpha = float(lora_alpha)
|
325
352
|
|
353
|
+
def set_vocab(self):
|
354
|
+
pass
|
355
|
+
|
326
356
|
def set_type(self):
|
327
357
|
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
328
358
|
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
329
359
|
|
330
360
|
def set_gguf_parameters(self):
|
331
361
|
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
|
332
|
-
super().set_gguf_parameters()
|
333
362
|
|
334
363
|
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
335
364
|
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
|
@@ -348,6 +377,9 @@ if __name__ == '__main__':
|
|
348
377
|
if ".base_layer.weight" in name:
|
349
378
|
continue
|
350
379
|
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
|
380
|
+
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
|
381
|
+
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
|
382
|
+
logger.error("Please refer to https://github.com/ggerganov/llama.cpp/pull/9948")
|
351
383
|
sys.exit(1)
|
352
384
|
|
353
385
|
if base_name in tensor_map:
|
@@ -381,9 +413,6 @@ if __name__ == '__main__':
|
|
381
413
|
yield (dest_name + ".lora_a", lora_a)
|
382
414
|
yield (dest_name + ".lora_b", lora_b)
|
383
415
|
|
384
|
-
with open(lora_config, "r") as f:
|
385
|
-
lparams: dict[str, Any] = json.load(f)
|
386
|
-
|
387
416
|
alpha: float = lparams["lora_alpha"]
|
388
417
|
|
389
418
|
model_instance = LoraModel(
|
@@ -396,6 +425,7 @@ if __name__ == '__main__':
|
|
396
425
|
dry_run=args.dry_run,
|
397
426
|
dir_lora_model=dir_lora,
|
398
427
|
lora_alpha=alpha,
|
428
|
+
hparams=hparams,
|
399
429
|
)
|
400
430
|
|
401
431
|
logger.info("Exporting model...")
|
@@ -64,15 +64,27 @@ class Keys:
|
|
64
64
|
BASE_MODEL_AUTHOR = "general.base_model.{id}.author"
|
65
65
|
BASE_MODEL_VERSION = "general.base_model.{id}.version"
|
66
66
|
BASE_MODEL_ORGANIZATION = "general.base_model.{id}.organization"
|
67
|
+
BASE_MODEL_DESCRIPTION = "general.base_model.{id}.description"
|
67
68
|
BASE_MODEL_URL = "general.base_model.{id}.url" # Model Website/Paper
|
68
69
|
BASE_MODEL_DOI = "general.base_model.{id}.doi"
|
69
70
|
BASE_MODEL_UUID = "general.base_model.{id}.uuid"
|
70
71
|
BASE_MODEL_REPO_URL = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
71
72
|
|
73
|
+
# Dataset Source
|
74
|
+
DATASET_COUNT = "general.dataset.count"
|
75
|
+
DATASET_NAME = "general.dataset.{id}.name"
|
76
|
+
DATASET_AUTHOR = "general.dataset.{id}.author"
|
77
|
+
DATASET_VERSION = "general.dataset.{id}.version"
|
78
|
+
DATASET_ORGANIZATION = "general.dataset.{id}.organization"
|
79
|
+
DATASET_DESCRIPTION = "general.dataset.{id}.description"
|
80
|
+
DATASET_URL = "general.dataset.{id}.url" # Model Website/Paper
|
81
|
+
DATASET_DOI = "general.dataset.{id}.doi"
|
82
|
+
DATASET_UUID = "general.dataset.{id}.uuid"
|
83
|
+
DATASET_REPO_URL = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...)
|
84
|
+
|
72
85
|
# Array based KV stores
|
73
86
|
TAGS = "general.tags"
|
74
87
|
LANGUAGES = "general.languages"
|
75
|
-
DATASETS = "general.datasets"
|
76
88
|
|
77
89
|
class LLM:
|
78
90
|
VOCAB_SIZE = "{arch}.vocab_size"
|
@@ -119,6 +131,7 @@ class Keys:
|
|
119
131
|
|
120
132
|
class Rope:
|
121
133
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
134
|
+
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
|
122
135
|
FREQ_BASE = "{arch}.rope.freq_base"
|
123
136
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
124
137
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
@@ -152,6 +165,8 @@ class Keys:
|
|
152
165
|
MERGES = "tokenizer.ggml.merges"
|
153
166
|
BOS_ID = "tokenizer.ggml.bos_token_id"
|
154
167
|
EOS_ID = "tokenizer.ggml.eos_token_id"
|
168
|
+
EOT_ID = "tokenizer.ggml.eot_token_id"
|
169
|
+
EOM_ID = "tokenizer.ggml.eom_token_id"
|
155
170
|
UNK_ID = "tokenizer.ggml.unknown_token_id"
|
156
171
|
SEP_ID = "tokenizer.ggml.seperator_token_id"
|
157
172
|
PAD_ID = "tokenizer.ggml.padding_token_id"
|
@@ -168,11 +183,16 @@ class Keys:
|
|
168
183
|
CHAT_TEMPLATE_N = "tokenizer.chat_template.{name}"
|
169
184
|
CHAT_TEMPLATES = "tokenizer.chat_templates"
|
170
185
|
# FIM/Infill special tokens constants
|
186
|
+
FIM_PRE_ID = "tokenizer.ggml.fim_pre_token_id"
|
187
|
+
FIM_SUF_ID = "tokenizer.ggml.fim_suf_token_id"
|
188
|
+
FIM_MID_ID = "tokenizer.ggml.fim_mid_token_id"
|
189
|
+
FIM_PAD_ID = "tokenizer.ggml.fim_pad_token_id"
|
190
|
+
FIM_REP_ID = "tokenizer.ggml.fim_rep_token_id"
|
191
|
+
FIM_SEP_ID = "tokenizer.ggml.fim_sep_token_id"
|
192
|
+
# deprecated:
|
171
193
|
PREFIX_ID = "tokenizer.ggml.prefix_token_id"
|
172
194
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
173
195
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
174
|
-
EOT_ID = "tokenizer.ggml.eot_token_id"
|
175
|
-
EOM_ID = "tokenizer.ggml.eom_token_id"
|
176
196
|
|
177
197
|
class Adapter:
|
178
198
|
TYPE = "adapter.type"
|
@@ -207,6 +227,7 @@ class MODEL_ARCH(IntEnum):
|
|
207
227
|
QWEN = auto()
|
208
228
|
QWEN2 = auto()
|
209
229
|
QWEN2MOE = auto()
|
230
|
+
QWEN2VL = auto()
|
210
231
|
PHI2 = auto()
|
211
232
|
PHI3 = auto()
|
212
233
|
PLAMO = auto()
|
@@ -224,6 +245,7 @@ class MODEL_ARCH(IntEnum):
|
|
224
245
|
COMMAND_R = auto()
|
225
246
|
DBRX = auto()
|
226
247
|
OLMO = auto()
|
248
|
+
OLMO2 = auto()
|
227
249
|
OLMOE = auto()
|
228
250
|
OPENELM = auto()
|
229
251
|
ARCTIC = auto()
|
@@ -368,6 +390,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
368
390
|
MODEL_ARCH.QWEN: "qwen",
|
369
391
|
MODEL_ARCH.QWEN2: "qwen2",
|
370
392
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
393
|
+
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
371
394
|
MODEL_ARCH.PHI2: "phi2",
|
372
395
|
MODEL_ARCH.PHI3: "phi3",
|
373
396
|
MODEL_ARCH.PLAMO: "plamo",
|
@@ -385,6 +408,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
385
408
|
MODEL_ARCH.COMMAND_R: "command-r",
|
386
409
|
MODEL_ARCH.DBRX: "dbrx",
|
387
410
|
MODEL_ARCH.OLMO: "olmo",
|
411
|
+
MODEL_ARCH.OLMO2: "olmo2",
|
388
412
|
MODEL_ARCH.OLMOE: "olmoe",
|
389
413
|
MODEL_ARCH.OPENELM: "openelm",
|
390
414
|
MODEL_ARCH.ARCTIC: "arctic",
|
@@ -737,6 +761,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
737
761
|
MODEL_TENSOR.FFN_UP,
|
738
762
|
],
|
739
763
|
MODEL_ARCH.QWEN2: [
|
764
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
765
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
766
|
+
MODEL_TENSOR.OUTPUT,
|
767
|
+
MODEL_TENSOR.ROPE_FREQS,
|
768
|
+
MODEL_TENSOR.ATTN_NORM,
|
769
|
+
MODEL_TENSOR.ATTN_Q,
|
770
|
+
MODEL_TENSOR.ATTN_K,
|
771
|
+
MODEL_TENSOR.ATTN_V,
|
772
|
+
MODEL_TENSOR.ATTN_OUT,
|
773
|
+
MODEL_TENSOR.FFN_NORM,
|
774
|
+
MODEL_TENSOR.FFN_GATE,
|
775
|
+
MODEL_TENSOR.FFN_DOWN,
|
776
|
+
MODEL_TENSOR.FFN_UP,
|
777
|
+
],
|
778
|
+
MODEL_ARCH.QWEN2VL: [
|
740
779
|
MODEL_TENSOR.TOKEN_EMBD,
|
741
780
|
MODEL_TENSOR.OUTPUT_NORM,
|
742
781
|
MODEL_TENSOR.OUTPUT,
|
@@ -875,6 +914,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
875
914
|
MODEL_TENSOR.OUTPUT,
|
876
915
|
MODEL_TENSOR.OUTPUT_NORM,
|
877
916
|
MODEL_TENSOR.ROPE_FREQS,
|
917
|
+
MODEL_TENSOR.ROPE_FACTORS_LONG,
|
918
|
+
MODEL_TENSOR.ROPE_FACTORS_SHORT,
|
878
919
|
MODEL_TENSOR.ATTN_NORM,
|
879
920
|
MODEL_TENSOR.ATTN_Q,
|
880
921
|
MODEL_TENSOR.ATTN_K,
|
@@ -1050,6 +1091,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
1050
1091
|
MODEL_TENSOR.FFN_DOWN,
|
1051
1092
|
MODEL_TENSOR.FFN_UP,
|
1052
1093
|
],
|
1094
|
+
MODEL_ARCH.OLMO2: [
|
1095
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
1096
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
1097
|
+
MODEL_TENSOR.OUTPUT,
|
1098
|
+
MODEL_TENSOR.ATTN_Q,
|
1099
|
+
MODEL_TENSOR.ATTN_K,
|
1100
|
+
MODEL_TENSOR.ATTN_V,
|
1101
|
+
MODEL_TENSOR.ATTN_OUT,
|
1102
|
+
MODEL_TENSOR.ATTN_POST_NORM,
|
1103
|
+
MODEL_TENSOR.ATTN_Q_NORM,
|
1104
|
+
MODEL_TENSOR.ATTN_K_NORM,
|
1105
|
+
MODEL_TENSOR.FFN_POST_NORM,
|
1106
|
+
MODEL_TENSOR.FFN_GATE,
|
1107
|
+
MODEL_TENSOR.FFN_DOWN,
|
1108
|
+
MODEL_TENSOR.FFN_UP,
|
1109
|
+
],
|
1053
1110
|
MODEL_ARCH.OLMOE: [
|
1054
1111
|
MODEL_TENSOR.TOKEN_EMBD,
|
1055
1112
|
MODEL_TENSOR.OUTPUT_NORM,
|
@@ -1351,9 +1408,10 @@ class TokenType(IntEnum):
|
|
1351
1408
|
|
1352
1409
|
|
1353
1410
|
class RopeScalingType(Enum):
|
1354
|
-
NONE
|
1355
|
-
LINEAR
|
1356
|
-
YARN
|
1411
|
+
NONE = 'none'
|
1412
|
+
LINEAR = 'linear'
|
1413
|
+
YARN = 'yarn'
|
1414
|
+
LONGROPE = 'longrope'
|
1357
1415
|
|
1358
1416
|
|
1359
1417
|
class PoolingType(IntEnum):
|
@@ -1392,9 +1450,6 @@ class GGMLQuantizationType(IntEnum):
|
|
1392
1450
|
F64 = 28
|
1393
1451
|
IQ1_M = 29
|
1394
1452
|
BF16 = 30
|
1395
|
-
Q4_0_4_4 = 31
|
1396
|
-
Q4_0_4_8 = 32
|
1397
|
-
Q4_0_8_8 = 33
|
1398
1453
|
TQ1_0 = 34
|
1399
1454
|
TQ2_0 = 35
|
1400
1455
|
|
@@ -1438,9 +1493,9 @@ class LlamaFileType(IntEnum):
|
|
1438
1493
|
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
1439
1494
|
MOSTLY_IQ1_M = 31 # except 1d tensors
|
1440
1495
|
MOSTLY_BF16 = 32 # except 1d tensors
|
1441
|
-
MOSTLY_Q4_0_4_4 = 33 #
|
1442
|
-
MOSTLY_Q4_0_4_8 = 34 #
|
1443
|
-
MOSTLY_Q4_0_8_8 = 35 #
|
1496
|
+
# MOSTLY_Q4_0_4_4 = 33 # removed from gguf files, use Q4_0 and runtime repack
|
1497
|
+
# MOSTLY_Q4_0_4_8 = 34 # removed from gguf files, use Q4_0 and runtime repack
|
1498
|
+
# MOSTLY_Q4_0_8_8 = 35 # removed from gguf files, use Q4_0 and runtime repack
|
1444
1499
|
MOSTLY_TQ1_0 = 36 # except 1d tensors
|
1445
1500
|
MOSTLY_TQ2_0 = 37 # except 1d tensors
|
1446
1501
|
|
@@ -1516,9 +1571,6 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
|
1516
1571
|
GGMLQuantizationType.F64: (1, 8),
|
1517
1572
|
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
1518
1573
|
GGMLQuantizationType.BF16: (1, 2),
|
1519
|
-
GGMLQuantizationType.Q4_0_4_4:(32, 2 + 16),
|
1520
|
-
GGMLQuantizationType.Q4_0_4_8:(32, 2 + 16),
|
1521
|
-
GGMLQuantizationType.Q4_0_8_8:(32, 2 + 16),
|
1522
1574
|
GGMLQuantizationType.TQ1_0: (256, 2 + 4 * 13),
|
1523
1575
|
GGMLQuantizationType.TQ2_0: (256, 2 + 64),
|
1524
1576
|
}
|
@@ -1579,6 +1631,8 @@ KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
|
|
1579
1631
|
KEY_TOKENIZER_MERGES = Keys.Tokenizer.MERGES
|
1580
1632
|
KEY_TOKENIZER_BOS_ID = Keys.Tokenizer.BOS_ID
|
1581
1633
|
KEY_TOKENIZER_EOS_ID = Keys.Tokenizer.EOS_ID
|
1634
|
+
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1635
|
+
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
1582
1636
|
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
|
1583
1637
|
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
|
1584
1638
|
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
|
@@ -1586,8 +1640,15 @@ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
|
|
1586
1640
|
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
|
1587
1641
|
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
|
1588
1642
|
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
|
1589
|
-
|
1643
|
+
|
1644
|
+
KEY_TOKENIZER_FIM_PRE_ID = Keys.Tokenizer.FIM_PRE_ID
|
1645
|
+
KEY_TOKENIZER_FIM_SUF_ID = Keys.Tokenizer.FIM_SUF_ID
|
1646
|
+
KEY_TOKENIZER_FIM_MID_ID = Keys.Tokenizer.FIM_MID_ID
|
1647
|
+
KEY_TOKENIZER_FIM_PAD_ID = Keys.Tokenizer.FIM_PAD_ID
|
1648
|
+
KEY_TOKENIZER_FIM_REP_ID = Keys.Tokenizer.FIM_REP_ID
|
1649
|
+
KEY_TOKENIZER_FIM_SEP_ID = Keys.Tokenizer.FIM_SEP_ID
|
1650
|
+
|
1651
|
+
# deprecated
|
1652
|
+
KEY_TOKENIZER_PREFIX_ID = Keys.Tokenizer.PREFIX_ID
|
1590
1653
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
1591
1654
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
1592
|
-
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
1593
|
-
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
bigdl/cpp/gguf-py/gguf/gguf.py
CHANGED
@@ -145,11 +145,10 @@ class GGUFReader:
|
|
145
145
|
count = int(count)
|
146
146
|
itemsize = int(np.empty([], dtype = dtype).itemsize)
|
147
147
|
end_offs = offset + itemsize * count
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
)
|
148
|
+
arr = self.data[offset:end_offs].view(dtype=dtype)[:count]
|
149
|
+
if override_order is None:
|
150
|
+
return arr
|
151
|
+
return arr.view(arr.dtype.newbyteorder(override_order))
|
153
152
|
|
154
153
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
155
154
|
if field.name in self.fields:
|
@@ -314,4 +313,4 @@ class GGUFReader:
|
|
314
313
|
data = self._get(data_offs, item_type, item_count).reshape(np_dims),
|
315
314
|
field = field,
|
316
315
|
))
|
317
|
-
self.tensors = tensors
|
316
|
+
self.tensors = tensors
|