bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +1196 -147
- bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
- bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
- bigdl/cpp/convert_lora_to_gguf.py +82 -14
- bigdl/cpp/gguf-py/gguf/constants.py +645 -187
- bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
- bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
- bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
- bigdl/cpp/gguf-py/gguf/quants.py +81 -0
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/libc++.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
- bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
- bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
- bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
- bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
- {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@
|
|
8
8
|
# provide the necessary information to llama.cpp via the GGUF header in order to implement
|
9
9
|
# the same pre-tokenizer.
|
10
10
|
#
|
11
|
-
# ref: https://github.com/
|
11
|
+
# ref: https://github.com/ggml-org/llama.cpp/pull/6920
|
12
12
|
#
|
13
13
|
# Instructions:
|
14
14
|
#
|
@@ -17,7 +17,7 @@
|
|
17
17
|
#
|
18
18
|
# python3 convert_hf_to_gguf_update.py <huggingface_token>
|
19
19
|
#
|
20
|
-
# -
|
20
|
+
# - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
|
21
21
|
# - Update llama.cpp with the new pre-tokenizer if necessary
|
22
22
|
#
|
23
23
|
# TODO: generate tokenizer tests for llama.cpp
|
@@ -31,6 +31,7 @@ import re
|
|
31
31
|
import requests
|
32
32
|
import sys
|
33
33
|
import json
|
34
|
+
import shutil
|
34
35
|
|
35
36
|
from hashlib import sha256
|
36
37
|
from enum import IntEnum, auto
|
@@ -64,39 +65,50 @@ else:
|
|
64
65
|
|
65
66
|
# TODO: add models here, base models preferred
|
66
67
|
models = [
|
67
|
-
{"name": "llama-spm",
|
68
|
-
{"name": "llama-bpe",
|
69
|
-
{"name": "phi-3",
|
70
|
-
{"name": "deepseek-llm",
|
71
|
-
{"name": "deepseek-coder",
|
72
|
-
{"name": "falcon",
|
73
|
-
{"name": "bert-bge",
|
74
|
-
{"name": "
|
75
|
-
{"name": "
|
76
|
-
{"name": "
|
77
|
-
{"name": "
|
78
|
-
{"name": "
|
79
|
-
{"name": "
|
80
|
-
{"name": "
|
81
|
-
{"name": "
|
82
|
-
{"name": "
|
83
|
-
{"name": "
|
84
|
-
{"name": "
|
85
|
-
{"name": "jina-
|
86
|
-
{"name": "
|
87
|
-
{"name": "
|
88
|
-
{"name": "jina-v2-
|
89
|
-
{"name": "
|
90
|
-
{"name": "
|
91
|
-
{"name": "
|
92
|
-
{"name": "
|
93
|
-
{"name": "
|
94
|
-
{"name": "
|
95
|
-
{"name": "
|
96
|
-
{"name": "
|
97
|
-
{
|
98
|
-
{
|
99
|
-
{"name": "
|
68
|
+
{"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
|
69
|
+
{"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
|
70
|
+
{"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
|
71
|
+
{"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
|
72
|
+
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
|
73
|
+
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
|
74
|
+
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
|
75
|
+
{"name": "falcon3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
|
76
|
+
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
|
77
|
+
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
|
78
|
+
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
|
79
|
+
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
|
80
|
+
{"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
|
81
|
+
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
|
82
|
+
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
|
83
|
+
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
|
84
|
+
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
|
85
|
+
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
|
86
|
+
{"name": "jina-v1-en", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-reranker-v1-tiny-en", },
|
87
|
+
{"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
|
88
|
+
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
|
89
|
+
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
|
90
|
+
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
|
91
|
+
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
|
92
|
+
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
|
93
|
+
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
|
94
|
+
{"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
|
95
|
+
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
|
96
|
+
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
|
97
|
+
{"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
|
98
|
+
{"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
|
99
|
+
{"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
|
100
|
+
{"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
|
101
|
+
{'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
|
102
|
+
{'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
|
103
|
+
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
|
104
|
+
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
|
105
|
+
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
|
106
|
+
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
|
107
|
+
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
|
108
|
+
{"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
|
109
|
+
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
|
110
|
+
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
|
111
|
+
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
|
100
112
|
]
|
101
113
|
|
102
114
|
|
@@ -125,12 +137,27 @@ def download_model(model):
|
|
125
137
|
if tokt == TOKENIZER_TYPE.UGM:
|
126
138
|
files.append("spiece.model")
|
127
139
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
140
|
+
if os.path.isdir(repo):
|
141
|
+
# If repo is a path on the file system, copy the directory
|
142
|
+
for file in files:
|
143
|
+
src_path = os.path.join(repo, file)
|
144
|
+
dst_path = f"models/tokenizers/{name}/{file}"
|
145
|
+
if os.path.isfile(dst_path):
|
146
|
+
logger.info(f"{name}: File {dst_path} already exists - skipping")
|
147
|
+
continue
|
148
|
+
if os.path.isfile(src_path):
|
149
|
+
shutil.copy2(src_path, dst_path)
|
150
|
+
logger.info(f"{name}: Copied {src_path} to {dst_path}")
|
151
|
+
else:
|
152
|
+
logger.warning(f"{name}: Source file {src_path} does not exist")
|
153
|
+
else:
|
154
|
+
# If repo is a URL, download the files
|
155
|
+
for file in files:
|
156
|
+
save_path = f"models/tokenizers/{name}/{file}"
|
157
|
+
if os.path.isfile(save_path):
|
158
|
+
logger.info(f"{name}: File {save_path} already exists - skipping")
|
159
|
+
continue
|
160
|
+
download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
|
134
161
|
|
135
162
|
|
136
163
|
for model in models:
|
@@ -219,7 +246,7 @@ src_func = f"""
|
|
219
246
|
logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet")
|
220
247
|
logger.warning("** - the pre-tokenization config has changed upstream")
|
221
248
|
logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.")
|
222
|
-
logger.warning("** ref: https://github.com/
|
249
|
+
logger.warning("** ref: https://github.com/ggml-org/llama.cpp/pull/6920")
|
223
250
|
logger.warning("**")
|
224
251
|
logger.warning(f"** chkhsh: {{chkhsh}}")
|
225
252
|
logger.warning("**************************************************************************************")
|
@@ -294,11 +294,7 @@ class GGMLToGGUF:
|
|
294
294
|
if self.vocab_override is not None:
|
295
295
|
vo = self.vocab_override
|
296
296
|
logger.info('* Adding vocab item(s)')
|
297
|
-
<<<<<<< HEAD:convert-llama-ggml-to-gguf.py
|
298
|
-
for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
|
299
|
-
=======
|
300
297
|
for (_, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
|
301
|
-
>>>>>>> 1731d42:convert_llama_ggml_to_gguf.py
|
302
298
|
tokens.append(vbytes)
|
303
299
|
scores.append(score)
|
304
300
|
toktypes.append(ttype)
|
@@ -12,6 +12,7 @@ import json
|
|
12
12
|
from math import prod
|
13
13
|
from pathlib import Path
|
14
14
|
from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast
|
15
|
+
from transformers import AutoConfig
|
15
16
|
|
16
17
|
import torch
|
17
18
|
|
@@ -225,12 +226,15 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
|
|
225
226
|
base_name = lora_tensor_name.replace("base_model.model.", "")
|
226
227
|
base_name = base_name.replace(".lora_A.weight", ".weight")
|
227
228
|
base_name = base_name.replace(".lora_B.weight", ".weight")
|
229
|
+
# models produced by mergekit-extract-lora have token embeddings in the adapter
|
230
|
+
base_name = base_name.replace(".lora_embedding_A", ".weight")
|
231
|
+
base_name = base_name.replace(".lora_embedding_B", ".weight")
|
228
232
|
return base_name
|
229
233
|
|
230
234
|
|
231
235
|
def parse_args() -> argparse.Namespace:
|
232
236
|
parser = argparse.ArgumentParser(
|
233
|
-
description="Convert a
|
237
|
+
description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file")
|
234
238
|
parser.add_argument(
|
235
239
|
"--outfile", type=Path,
|
236
240
|
help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
|
@@ -256,17 +260,27 @@ def parse_args() -> argparse.Namespace:
|
|
256
260
|
help="only print out what will be done, without writing any new files",
|
257
261
|
)
|
258
262
|
parser.add_argument(
|
259
|
-
"--base", type=Path,
|
260
|
-
help="directory containing base model
|
263
|
+
"--base", type=Path,
|
264
|
+
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
|
265
|
+
)
|
266
|
+
parser.add_argument(
|
267
|
+
"--base-model-id", type=str,
|
268
|
+
help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
|
261
269
|
)
|
262
270
|
parser.add_argument(
|
263
271
|
"lora_path", type=Path,
|
264
|
-
help="directory containing LoRA
|
272
|
+
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
|
265
273
|
)
|
266
274
|
|
267
275
|
return parser.parse_args()
|
268
276
|
|
269
277
|
|
278
|
+
def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
|
279
|
+
# normally, adapter does not come with base model config, we need to load it from AutoConfig
|
280
|
+
config = AutoConfig.from_pretrained(hf_model_id)
|
281
|
+
return config.to_dict()
|
282
|
+
|
283
|
+
|
270
284
|
if __name__ == '__main__':
|
271
285
|
args = parse_args()
|
272
286
|
logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
|
@@ -281,8 +295,9 @@ if __name__ == '__main__':
|
|
281
295
|
|
282
296
|
ftype = ftype_map[args.outtype]
|
283
297
|
|
284
|
-
dir_base_model: Path = args.base
|
298
|
+
dir_base_model: Path | None = args.base
|
285
299
|
dir_lora: Path = args.lora_path
|
300
|
+
base_model_id: str | None = args.base_model_id
|
286
301
|
lora_config = dir_lora / "adapter_config.json"
|
287
302
|
input_model = dir_lora / "adapter_model.safetensors"
|
288
303
|
|
@@ -301,9 +316,32 @@ if __name__ == '__main__':
|
|
301
316
|
input_model = os.path.join(dir_lora, "adapter_model.bin")
|
302
317
|
lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
|
303
318
|
|
319
|
+
# load LoRA config
|
320
|
+
with open(lora_config, "r") as f:
|
321
|
+
lparams: dict[str, Any] = json.load(f)
|
322
|
+
|
304
323
|
# load base model
|
305
|
-
|
306
|
-
|
324
|
+
if base_model_id is not None:
|
325
|
+
logger.info(f"Loading base model from Hugging Face: {base_model_id}")
|
326
|
+
hparams = load_hparams_from_hf(base_model_id)
|
327
|
+
elif dir_base_model is None:
|
328
|
+
if "base_model_name_or_path" in lparams:
|
329
|
+
model_id = lparams["base_model_name_or_path"]
|
330
|
+
logger.info(f"Loading base model from Hugging Face: {model_id}")
|
331
|
+
try:
|
332
|
+
hparams = load_hparams_from_hf(model_id)
|
333
|
+
except OSError as e:
|
334
|
+
logger.error(f"Failed to load base model config: {e}")
|
335
|
+
logger.error("Please try downloading the base model and add its path to --base")
|
336
|
+
sys.exit(1)
|
337
|
+
else:
|
338
|
+
logger.error("'base_model_name_or_path' is not found in adapter_config.json")
|
339
|
+
logger.error("Base model config is required. Please download the base model and add its path to --base")
|
340
|
+
sys.exit(1)
|
341
|
+
else:
|
342
|
+
logger.info(f"Loading base model: {dir_base_model.name}")
|
343
|
+
hparams = Model.load_hparams(dir_base_model)
|
344
|
+
|
307
345
|
with torch.inference_mode():
|
308
346
|
try:
|
309
347
|
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
@@ -323,13 +361,19 @@ if __name__ == '__main__':
|
|
323
361
|
self.dir_model_card = dir_lora_model
|
324
362
|
self.lora_alpha = float(lora_alpha)
|
325
363
|
|
364
|
+
def set_vocab(self):
|
365
|
+
pass
|
366
|
+
|
326
367
|
def set_type(self):
|
327
368
|
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
328
369
|
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
329
370
|
|
330
371
|
def set_gguf_parameters(self):
|
331
372
|
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
|
332
|
-
|
373
|
+
|
374
|
+
def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
|
375
|
+
# Never add extra tensors (e.g. rope_freqs) for LoRA adapters
|
376
|
+
return ()
|
333
377
|
|
334
378
|
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
335
379
|
tensor_map: dict[str, PartialLoraTensor] = {}
|
@@ -338,12 +382,20 @@ if __name__ == '__main__':
|
|
338
382
|
if self.lazy:
|
339
383
|
tensor = LazyTorchTensor.from_eager(tensor)
|
340
384
|
base_name = get_base_tensor_name(name)
|
341
|
-
|
342
|
-
|
385
|
+
# note: mergekit-extract-lora also adds token embeddings to the adapter
|
386
|
+
is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
|
387
|
+
is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
|
343
388
|
if not is_lora_a and not is_lora_b:
|
344
389
|
if ".base_layer.weight" in name:
|
345
390
|
continue
|
391
|
+
# mergekit-extract-lora add these layernorm to the adapter, we need to keep them
|
392
|
+
if "_layernorm" in name or ".norm" in name:
|
393
|
+
yield (base_name, tensor)
|
394
|
+
continue
|
346
395
|
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
|
396
|
+
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
|
397
|
+
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
|
398
|
+
logger.error("Please refer to https://github.com/ggml-org/llama.cpp/pull/9948")
|
347
399
|
sys.exit(1)
|
348
400
|
|
349
401
|
if base_name in tensor_map:
|
@@ -363,17 +415,32 @@ if __name__ == '__main__':
|
|
363
415
|
yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)))
|
364
416
|
|
365
417
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
366
|
-
dest = super().modify_tensors(data_torch, name, bid)
|
418
|
+
dest = list(super().modify_tensors(data_torch, name, bid))
|
419
|
+
# some archs may have the same tensor for lm_head and output (tie word embeddings)
|
420
|
+
# in this case, adapters targeting lm_head will fail when using llama-export-lora
|
421
|
+
# therefore, we ignore them for now
|
422
|
+
# see: https://github.com/ggml-org/llama.cpp/issues/9065
|
423
|
+
if name == "lm_head.weight" and len(dest) == 0:
|
424
|
+
raise ValueError("lm_head is present in adapter, but is ignored in base model")
|
367
425
|
for dest_name, dest_data in dest:
|
426
|
+
# mergekit-extract-lora add these layernorm to the adapter
|
427
|
+
if "_norm" in dest_name:
|
428
|
+
assert dest_data.dim() == 1
|
429
|
+
yield (dest_name, dest_data)
|
430
|
+
continue
|
431
|
+
|
432
|
+
# otherwise, we must get the lora_A and lora_B tensors
|
368
433
|
assert isinstance(dest_data, LoraTorchTensor)
|
369
434
|
lora_a, lora_b = dest_data.get_lora_A_B()
|
370
435
|
|
436
|
+
# note: mergekit-extract-lora flip and transpose A and B
|
437
|
+
# here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
|
438
|
+
if "token_embd.weight" in dest_name:
|
439
|
+
lora_a = lora_a.T
|
440
|
+
|
371
441
|
yield (dest_name + ".lora_a", lora_a)
|
372
442
|
yield (dest_name + ".lora_b", lora_b)
|
373
443
|
|
374
|
-
with open(lora_config, "r") as f:
|
375
|
-
lparams: dict[str, Any] = json.load(f)
|
376
|
-
|
377
444
|
alpha: float = lparams["lora_alpha"]
|
378
445
|
|
379
446
|
model_instance = LoraModel(
|
@@ -386,6 +453,7 @@ if __name__ == '__main__':
|
|
386
453
|
dry_run=args.dry_run,
|
387
454
|
dir_lora_model=dir_lora,
|
388
455
|
lora_alpha=alpha,
|
456
|
+
hparams=hparams,
|
389
457
|
)
|
390
458
|
|
391
459
|
logger.info("Exporting model...")
|