bigdl-core-cpp 2.6.0b20250320__py3-none-win_amd64.whl → 2.6.0b20250322__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert_hf_to_gguf.py +687 -60
- bigdl/cpp/convert_hf_to_gguf_update.py +46 -41
- bigdl/cpp/convert_lora_to_gguf.py +33 -5
- bigdl/cpp/gguf-py/gguf/constants.py +306 -123
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +31 -3
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +122 -25
- bigdl/cpp/gguf-py/gguf/utility.py +1 -1
- bigdl/cpp/gguf-py/gguf/vocab.py +1 -1
- bigdl/cpp/libs/common.lib +0 -0
- bigdl/cpp/libs/ggml-base.dll +0 -0
- bigdl/cpp/libs/ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ggml.dll +0 -0
- bigdl/cpp/libs/llama-batched.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama-cli.exe +0 -0
- bigdl/cpp/libs/llama-embedding.exe +0 -0
- bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
- bigdl/cpp/libs/llama-gguf.exe +0 -0
- bigdl/cpp/libs/llama-llava-cli.exe +0 -0
- bigdl/cpp/libs/llama-lookup.exe +0 -0
- bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
- bigdl/cpp/libs/llama-perplexity.exe +0 -0
- bigdl/cpp/libs/llama-quantize.exe +0 -0
- bigdl/cpp/libs/llama-server.exe +0 -0
- bigdl/cpp/libs/llama-simple.exe +0 -0
- bigdl/cpp/libs/llama-speculative.exe +0 -0
- bigdl/cpp/libs/llama-tokenize.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
- bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
- bigdl/cpp/libs/ollama-lib.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/ollama_ggml.dll +0 -0
- bigdl/cpp/libs/ollama_llama.dll +0 -0
- bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
- {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.6.0b20250322.dist-info/RECORD +57 -0
- {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/WHEEL +1 -1
- bigdl_core_cpp-2.6.0b20250320.dist-info/RECORD +0 -57
- {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-llama-cpp.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.6.0b20250320.data → bigdl_core_cpp-2.6.0b20250322.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.6.0b20250320.dist-info → bigdl_core_cpp-2.6.0b20250322.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@
|
|
8
8
|
# provide the necessary information to llama.cpp via the GGUF header in order to implement
|
9
9
|
# the same pre-tokenizer.
|
10
10
|
#
|
11
|
-
# ref: https://github.com/
|
11
|
+
# ref: https://github.com/ggml-org/llama.cpp/pull/6920
|
12
12
|
#
|
13
13
|
# Instructions:
|
14
14
|
#
|
@@ -65,45 +65,50 @@ else:
|
|
65
65
|
|
66
66
|
# TODO: add models here, base models preferred
|
67
67
|
models = [
|
68
|
-
{"name": "llama-spm",
|
69
|
-
{"name": "llama-bpe",
|
70
|
-
{"name": "phi-3",
|
71
|
-
{"name": "deepseek-llm",
|
72
|
-
{"name": "deepseek-coder",
|
73
|
-
{"name": "falcon",
|
74
|
-
{"name": "bert-bge",
|
75
|
-
{"name": "
|
76
|
-
{"name": "
|
77
|
-
{"name": "
|
78
|
-
{"name": "
|
79
|
-
{"name": "
|
80
|
-
{"name": "
|
81
|
-
{"name": "
|
82
|
-
{"name": "
|
83
|
-
{"name": "
|
84
|
-
{"name": "
|
85
|
-
{"name": "
|
86
|
-
{"name": "jina-
|
87
|
-
{"name": "jina-v2-
|
88
|
-
{"name": "jina-v2-
|
89
|
-
{"name": "
|
90
|
-
{"name": "
|
91
|
-
{"name": "
|
92
|
-
{"name": "
|
93
|
-
{"name": "
|
94
|
-
{"name": "gemma
|
95
|
-
{"name": "
|
96
|
-
{"name": "
|
97
|
-
{"name": "
|
98
|
-
{"name": "
|
99
|
-
{"name": "
|
100
|
-
{
|
101
|
-
{'name': "
|
102
|
-
{
|
103
|
-
{"name": "
|
104
|
-
{"name": "
|
105
|
-
{"name": "
|
106
|
-
{"name": "
|
68
|
+
{"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
|
69
|
+
{"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
|
70
|
+
{"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
|
71
|
+
{"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
|
72
|
+
{"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
|
73
|
+
{"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
|
74
|
+
{"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
|
75
|
+
{"name": "falcon3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
|
76
|
+
{"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
|
77
|
+
{"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
|
78
|
+
{"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
|
79
|
+
{"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
|
80
|
+
{"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
|
81
|
+
{"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
|
82
|
+
{"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
|
83
|
+
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
|
84
|
+
{"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
|
85
|
+
{"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
|
86
|
+
{"name": "jina-v1-en", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-reranker-v1-tiny-en", },
|
87
|
+
{"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
|
88
|
+
{"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
|
89
|
+
{"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
|
90
|
+
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
|
91
|
+
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
|
92
|
+
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
|
93
|
+
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
|
94
|
+
{"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
|
95
|
+
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
|
96
|
+
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
|
97
|
+
{"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
|
98
|
+
{"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
|
99
|
+
{"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
|
100
|
+
{"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
|
101
|
+
{'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
|
102
|
+
{'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
|
103
|
+
{"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
|
104
|
+
{"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
|
105
|
+
{"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
|
106
|
+
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
|
107
|
+
{"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
|
108
|
+
{"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
|
109
|
+
{"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
|
110
|
+
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
|
111
|
+
{"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
|
107
112
|
]
|
108
113
|
|
109
114
|
|
@@ -241,7 +246,7 @@ src_func = f"""
|
|
241
246
|
logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet")
|
242
247
|
logger.warning("** - the pre-tokenization config has changed upstream")
|
243
248
|
logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.")
|
244
|
-
logger.warning("** ref: https://github.com/
|
249
|
+
logger.warning("** ref: https://github.com/ggml-org/llama.cpp/pull/6920")
|
245
250
|
logger.warning("**")
|
246
251
|
logger.warning(f"** chkhsh: {{chkhsh}}")
|
247
252
|
logger.warning("**************************************************************************************")
|
@@ -226,6 +226,9 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
|
|
226
226
|
base_name = lora_tensor_name.replace("base_model.model.", "")
|
227
227
|
base_name = base_name.replace(".lora_A.weight", ".weight")
|
228
228
|
base_name = base_name.replace(".lora_B.weight", ".weight")
|
229
|
+
# models produced by mergekit-extract-lora have token embeddings in the adapter
|
230
|
+
base_name = base_name.replace(".lora_embedding_A", ".weight")
|
231
|
+
base_name = base_name.replace(".lora_embedding_B", ".weight")
|
229
232
|
return base_name
|
230
233
|
|
231
234
|
|
@@ -260,6 +263,10 @@ def parse_args() -> argparse.Namespace:
|
|
260
263
|
"--base", type=Path,
|
261
264
|
help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
|
262
265
|
)
|
266
|
+
parser.add_argument(
|
267
|
+
"--base-model-id", type=str,
|
268
|
+
help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
|
269
|
+
)
|
263
270
|
parser.add_argument(
|
264
271
|
"lora_path", type=Path,
|
265
272
|
help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
|
@@ -290,6 +297,7 @@ if __name__ == '__main__':
|
|
290
297
|
|
291
298
|
dir_base_model: Path | None = args.base
|
292
299
|
dir_lora: Path = args.lora_path
|
300
|
+
base_model_id: str | None = args.base_model_id
|
293
301
|
lora_config = dir_lora / "adapter_config.json"
|
294
302
|
input_model = dir_lora / "adapter_model.safetensors"
|
295
303
|
|
@@ -313,7 +321,10 @@ if __name__ == '__main__':
|
|
313
321
|
lparams: dict[str, Any] = json.load(f)
|
314
322
|
|
315
323
|
# load base model
|
316
|
-
if
|
324
|
+
if base_model_id is not None:
|
325
|
+
logger.info(f"Loading base model from Hugging Face: {base_model_id}")
|
326
|
+
hparams = load_hparams_from_hf(base_model_id)
|
327
|
+
elif dir_base_model is None:
|
317
328
|
if "base_model_name_or_path" in lparams:
|
318
329
|
model_id = lparams["base_model_name_or_path"]
|
319
330
|
logger.info(f"Loading base model from Hugging Face: {model_id}")
|
@@ -371,15 +382,20 @@ if __name__ == '__main__':
|
|
371
382
|
if self.lazy:
|
372
383
|
tensor = LazyTorchTensor.from_eager(tensor)
|
373
384
|
base_name = get_base_tensor_name(name)
|
374
|
-
|
375
|
-
|
385
|
+
# note: mergekit-extract-lora also adds token embeddings to the adapter
|
386
|
+
is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
|
387
|
+
is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
|
376
388
|
if not is_lora_a and not is_lora_b:
|
377
389
|
if ".base_layer.weight" in name:
|
378
390
|
continue
|
391
|
+
# mergekit-extract-lora add these layernorm to the adapter, we need to keep them
|
392
|
+
if "_layernorm" in name or ".norm" in name:
|
393
|
+
yield (base_name, tensor)
|
394
|
+
continue
|
379
395
|
logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
|
380
396
|
if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
|
381
397
|
logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
|
382
|
-
logger.error("Please refer to https://github.com/
|
398
|
+
logger.error("Please refer to https://github.com/ggml-org/llama.cpp/pull/9948")
|
383
399
|
sys.exit(1)
|
384
400
|
|
385
401
|
if base_name in tensor_map:
|
@@ -403,13 +419,25 @@ if __name__ == '__main__':
|
|
403
419
|
# some archs may have the same tensor for lm_head and output (tie word embeddings)
|
404
420
|
# in this case, adapters targeting lm_head will fail when using llama-export-lora
|
405
421
|
# therefore, we ignore them for now
|
406
|
-
# see: https://github.com/
|
422
|
+
# see: https://github.com/ggml-org/llama.cpp/issues/9065
|
407
423
|
if name == "lm_head.weight" and len(dest) == 0:
|
408
424
|
raise ValueError("lm_head is present in adapter, but is ignored in base model")
|
409
425
|
for dest_name, dest_data in dest:
|
426
|
+
# mergekit-extract-lora add these layernorm to the adapter
|
427
|
+
if "_norm" in dest_name:
|
428
|
+
assert dest_data.dim() == 1
|
429
|
+
yield (dest_name, dest_data)
|
430
|
+
continue
|
431
|
+
|
432
|
+
# otherwise, we must get the lora_A and lora_B tensors
|
410
433
|
assert isinstance(dest_data, LoraTorchTensor)
|
411
434
|
lora_a, lora_b = dest_data.get_lora_A_B()
|
412
435
|
|
436
|
+
# note: mergekit-extract-lora flip and transpose A and B
|
437
|
+
# here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
|
438
|
+
if "token_embd.weight" in dest_name:
|
439
|
+
lora_a = lora_a.T
|
440
|
+
|
413
441
|
yield (dest_name + ".lora_a", lora_a)
|
414
442
|
yield (dest_name + ".lora_b", lora_b)
|
415
443
|
|