bigdl-core-cpp 2.5.0b20240827__py3-none-win_amd64.whl → 2.6.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +1196 -147
  2. bigdl/cpp/convert_hf_to_gguf_update.py +69 -42
  3. bigdl/cpp/convert_llama_ggml_to_gguf.py +0 -4
  4. bigdl/cpp/convert_lora_to_gguf.py +82 -14
  5. bigdl/cpp/gguf-py/gguf/constants.py +645 -187
  6. bigdl/cpp/gguf-py/gguf/gguf.py +1 -1
  7. bigdl/cpp/gguf-py/gguf/gguf_reader.py +5 -6
  8. bigdl/cpp/gguf-py/gguf/gguf_writer.py +92 -16
  9. bigdl/cpp/gguf-py/gguf/lazy.py +0 -1
  10. bigdl/cpp/gguf-py/gguf/metadata.py +131 -19
  11. bigdl/cpp/gguf-py/gguf/quants.py +81 -0
  12. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +249 -38
  13. bigdl/cpp/gguf-py/gguf/utility.py +1 -1
  14. bigdl/cpp/gguf-py/gguf/vocab.py +24 -2
  15. bigdl/cpp/libs/common.lib +0 -0
  16. bigdl/cpp/libs/ggml-base.dll +0 -0
  17. bigdl/cpp/libs/ggml-cpu.dll +0 -0
  18. bigdl/cpp/libs/ggml-sycl.dll +0 -0
  19. bigdl/cpp/libs/ggml.dll +0 -0
  20. bigdl/cpp/libs/libc++.dll +0 -0
  21. bigdl/cpp/libs/llama-batched.exe +0 -0
  22. bigdl/cpp/libs/llama-bench.exe +0 -0
  23. bigdl/cpp/libs/llama-cli.exe +0 -0
  24. bigdl/cpp/libs/llama-embedding.exe +0 -0
  25. bigdl/cpp/libs/llama-gemma3-cli.exe +0 -0
  26. bigdl/cpp/libs/llama-gguf.exe +0 -0
  27. bigdl/cpp/libs/llama-llava-cli.exe +0 -0
  28. bigdl/cpp/libs/llama-lookup.exe +0 -0
  29. bigdl/cpp/libs/llama-ls-sycl-device.exe +0 -0
  30. bigdl/cpp/libs/llama-minicpmv-cli.exe +0 -0
  31. bigdl/cpp/libs/llama-perplexity.exe +0 -0
  32. bigdl/cpp/libs/llama-quantize.exe +0 -0
  33. bigdl/cpp/libs/llama-server.exe +0 -0
  34. bigdl/cpp/libs/llama-simple.exe +0 -0
  35. bigdl/cpp/libs/llama-speculative.exe +0 -0
  36. bigdl/cpp/libs/llama-tokenize.exe +0 -0
  37. bigdl/cpp/libs/llama.dll +0 -0
  38. bigdl/cpp/libs/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama-ggml-base.dll +0 -0
  40. bigdl/cpp/libs/ollama-ggml-cpu.dll +0 -0
  41. bigdl/cpp/libs/ollama-ggml-sycl.dll +0 -0
  42. bigdl/cpp/libs/ollama-lib.exe +0 -0
  43. bigdl/cpp/libs/ollama.exe +0 -0
  44. bigdl/cpp/libs/ollama_ggml.dll +0 -0
  45. bigdl/cpp/libs/ollama_llama.dll +0 -0
  46. bigdl/cpp/libs/ollama_llava_shared.dll +0 -0
  47. bigdl_core_cpp-2.6.0.data/scripts/init-ollama.bat +16 -0
  48. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/METADATA +9 -5
  49. bigdl_core_cpp-2.6.0.dist-info/RECORD +57 -0
  50. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/WHEEL +1 -1
  51. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ggml.dll +0 -0
  52. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/llama.dll +0 -0
  53. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu/ollama_llama_server.exe +0 -0
  54. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ggml.dll +0 -0
  55. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/llama.dll +0 -0
  56. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx/ollama_llama_server.exe +0 -0
  57. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ggml.dll +0 -0
  58. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/llama.dll +0 -0
  59. bigdl/cpp/libs/dist/windows-amd64/lib/ollama/runners/cpu_avx2/ollama_llama_server.exe +0 -0
  60. bigdl_core_cpp-2.5.0b20240827.data/scripts/init-ollama.bat +0 -19
  61. bigdl_core_cpp-2.5.0b20240827.dist-info/RECORD +0 -54
  62. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.bat +0 -0
  63. {bigdl_core_cpp-2.5.0b20240827.data → bigdl_core_cpp-2.6.0.data}/scripts/init-llama-cpp.ps1 +0 -0
  64. {bigdl_core_cpp-2.5.0b20240827.dist-info → bigdl_core_cpp-2.6.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@
8
8
  # provide the necessary information to llama.cpp via the GGUF header in order to implement
9
9
  # the same pre-tokenizer.
10
10
  #
11
- # ref: https://github.com/ggerganov/llama.cpp/pull/6920
11
+ # ref: https://github.com/ggml-org/llama.cpp/pull/6920
12
12
  #
13
13
  # Instructions:
14
14
  #
@@ -17,7 +17,7 @@
17
17
  #
18
18
  # python3 convert_hf_to_gguf_update.py <huggingface_token>
19
19
  #
20
- # - Copy-paste the generated get_vocab_base_pre() function into convert_hf_to_gguf.py
20
+ # - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
21
21
  # - Update llama.cpp with the new pre-tokenizer if necessary
22
22
  #
23
23
  # TODO: generate tokenizer tests for llama.cpp
@@ -31,6 +31,7 @@ import re
31
31
  import requests
32
32
  import sys
33
33
  import json
34
+ import shutil
34
35
 
35
36
  from hashlib import sha256
36
37
  from enum import IntEnum, auto
@@ -64,39 +65,50 @@ else:
64
65
 
65
66
  # TODO: add models here, base models preferred
66
67
  models = [
67
- {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
68
- {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
69
- {"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
70
- {"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
71
- {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
72
- {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
73
- {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
74
- {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
75
- {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
76
- {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
77
- {"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
78
- {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
79
- {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
80
- {"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
81
- {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
82
- {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
83
- {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
84
- {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
85
- {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
86
- {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
87
- {"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
88
- {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
89
- {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
90
- {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
91
- {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
92
- {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
93
- {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
94
- {"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
95
- {"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
96
- {"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
97
- {'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
98
- {'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
99
- {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
68
+ {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
69
+ {"name": "llama-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Meta-Llama-3-8B", },
70
+ {"name": "phi-3", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct", },
71
+ {"name": "deepseek-llm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-llm-7b-base", },
72
+ {"name": "deepseek-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base", },
73
+ {"name": "falcon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/falcon-7b", },
74
+ {"name": "bert-bge", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/BAAI/bge-small-en-v1.5", },
75
+ {"name": "falcon3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon3-7B-Base", },
76
+ {"name": "bert-bge-large", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/BAAI/bge-large-zh-v1.5", },
77
+ {"name": "mpt", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mosaicml/mpt-7b", },
78
+ {"name": "starcoder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigcode/starcoder2-3b", },
79
+ {"name": "gpt-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/openai-community/gpt2", },
80
+ {"name": "stablelm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b", },
81
+ {"name": "refact", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/smallcloudai/Refact-1_6-base", },
82
+ {"name": "command-r", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/CohereForAI/c4ai-command-r-v01", },
83
+ {"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen1.5-7B", },
84
+ {"name": "olmo", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/allenai/OLMo-1.7-7B-hf", },
85
+ {"name": "dbrx", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/databricks/dbrx-base", },
86
+ {"name": "jina-v1-en", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-reranker-v1-tiny-en", },
87
+ {"name": "jina-v2-en", "tokt": TOKENIZER_TYPE.WPM, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", }, # WPM!
88
+ {"name": "jina-v2-es", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-es", },
89
+ {"name": "jina-v2-de", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-de", },
90
+ {"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
91
+ {"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
92
+ {"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
93
+ {"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
94
+ {"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
95
+ {"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
96
+ {"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
97
+ {"name": "t5", "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
98
+ {"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
99
+ {"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
100
+ {"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
101
+ {'name': "bloom", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/bigscience/bloom", },
102
+ {'name': "gpt3-finnish", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/TurkuNLP/gpt3-finnish-small", },
103
+ {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
104
+ {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
105
+ {"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
106
+ {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
107
+ {"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
108
+ {"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
109
+ {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
110
+ {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111
+ {"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
100
112
  ]
101
113
 
102
114
 
@@ -125,12 +137,27 @@ def download_model(model):
125
137
  if tokt == TOKENIZER_TYPE.UGM:
126
138
  files.append("spiece.model")
127
139
 
128
- for file in files:
129
- save_path = f"models/tokenizers/{name}/{file}"
130
- if os.path.isfile(save_path):
131
- logger.info(f"{name}: File {save_path} already exists - skipping")
132
- continue
133
- download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
140
+ if os.path.isdir(repo):
141
+ # If repo is a path on the file system, copy the directory
142
+ for file in files:
143
+ src_path = os.path.join(repo, file)
144
+ dst_path = f"models/tokenizers/{name}/{file}"
145
+ if os.path.isfile(dst_path):
146
+ logger.info(f"{name}: File {dst_path} already exists - skipping")
147
+ continue
148
+ if os.path.isfile(src_path):
149
+ shutil.copy2(src_path, dst_path)
150
+ logger.info(f"{name}: Copied {src_path} to {dst_path}")
151
+ else:
152
+ logger.warning(f"{name}: Source file {src_path} does not exist")
153
+ else:
154
+ # If repo is a URL, download the files
155
+ for file in files:
156
+ save_path = f"models/tokenizers/{name}/{file}"
157
+ if os.path.isfile(save_path):
158
+ logger.info(f"{name}: File {save_path} already exists - skipping")
159
+ continue
160
+ download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
134
161
 
135
162
 
136
163
  for model in models:
@@ -219,7 +246,7 @@ src_func = f"""
219
246
  logger.warning("** - the model has not been added to convert_hf_to_gguf_update.py yet")
220
247
  logger.warning("** - the pre-tokenization config has changed upstream")
221
248
  logger.warning("** Check your model files and convert_hf_to_gguf_update.py and update them accordingly.")
222
- logger.warning("** ref: https://github.com/ggerganov/llama.cpp/pull/6920")
249
+ logger.warning("** ref: https://github.com/ggml-org/llama.cpp/pull/6920")
223
250
  logger.warning("**")
224
251
  logger.warning(f"** chkhsh: {{chkhsh}}")
225
252
  logger.warning("**************************************************************************************")
@@ -294,11 +294,7 @@ class GGMLToGGUF:
294
294
  if self.vocab_override is not None:
295
295
  vo = self.vocab_override
296
296
  logger.info('* Adding vocab item(s)')
297
- <<<<<<< HEAD:convert-llama-ggml-to-gguf.py
298
- for (idx, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
299
- =======
300
297
  for (_, (vbytes, score, ttype)) in enumerate(vo.all_tokens()):
301
- >>>>>>> 1731d42:convert_llama_ggml_to_gguf.py
302
298
  tokens.append(vbytes)
303
299
  scores.append(score)
304
300
  toktypes.append(ttype)
@@ -12,6 +12,7 @@ import json
12
12
  from math import prod
13
13
  from pathlib import Path
14
14
  from typing import TYPE_CHECKING, Any, Callable, Iterable, Iterator, Sequence, SupportsIndex, cast
15
+ from transformers import AutoConfig
15
16
 
16
17
  import torch
17
18
 
@@ -225,12 +226,15 @@ def get_base_tensor_name(lora_tensor_name: str) -> str:
225
226
  base_name = lora_tensor_name.replace("base_model.model.", "")
226
227
  base_name = base_name.replace(".lora_A.weight", ".weight")
227
228
  base_name = base_name.replace(".lora_B.weight", ".weight")
229
+ # models produced by mergekit-extract-lora have token embeddings in the adapter
230
+ base_name = base_name.replace(".lora_embedding_A", ".weight")
231
+ base_name = base_name.replace(".lora_embedding_B", ".weight")
228
232
  return base_name
229
233
 
230
234
 
231
235
  def parse_args() -> argparse.Namespace:
232
236
  parser = argparse.ArgumentParser(
233
- description="Convert a huggingface PEFT LoRA adapter to a GGML compatible file")
237
+ description="Convert a Hugging Face PEFT LoRA adapter to a GGUF file")
234
238
  parser.add_argument(
235
239
  "--outfile", type=Path,
236
240
  help="path to write to; default: based on input. {ftype} will be replaced by the outtype.",
@@ -256,17 +260,27 @@ def parse_args() -> argparse.Namespace:
256
260
  help="only print out what will be done, without writing any new files",
257
261
  )
258
262
  parser.add_argument(
259
- "--base", type=Path, required=True,
260
- help="directory containing base model file",
263
+ "--base", type=Path,
264
+ help="directory containing Hugging Face model config files (config.json, tokenizer.json) for the base model that the adapter is based on - only config is needed, actual model weights are not required. If base model is unspecified, it will be loaded from Hugging Face hub based on the adapter config",
265
+ )
266
+ parser.add_argument(
267
+ "--base-model-id", type=str,
268
+ help="the model ID of the base model, if it is not available locally or in the adapter config. If specified, it will ignore --base and load the base model config from the Hugging Face hub (Example: 'meta-llama/Llama-3.2-1B-Instruct')",
261
269
  )
262
270
  parser.add_argument(
263
271
  "lora_path", type=Path,
264
- help="directory containing LoRA adapter file",
272
+ help="directory containing Hugging Face PEFT LoRA config (adapter_model.json) and weights (adapter_model.safetensors or adapter_model.bin)",
265
273
  )
266
274
 
267
275
  return parser.parse_args()
268
276
 
269
277
 
278
+ def load_hparams_from_hf(hf_model_id: str) -> dict[str, Any]:
279
+ # normally, adapter does not come with base model config, we need to load it from AutoConfig
280
+ config = AutoConfig.from_pretrained(hf_model_id)
281
+ return config.to_dict()
282
+
283
+
270
284
  if __name__ == '__main__':
271
285
  args = parse_args()
272
286
  logging.basicConfig(level=logging.DEBUG if args.verbose else logging.INFO)
@@ -281,8 +295,9 @@ if __name__ == '__main__':
281
295
 
282
296
  ftype = ftype_map[args.outtype]
283
297
 
284
- dir_base_model: Path = args.base
298
+ dir_base_model: Path | None = args.base
285
299
  dir_lora: Path = args.lora_path
300
+ base_model_id: str | None = args.base_model_id
286
301
  lora_config = dir_lora / "adapter_config.json"
287
302
  input_model = dir_lora / "adapter_model.safetensors"
288
303
 
@@ -301,9 +316,32 @@ if __name__ == '__main__':
301
316
  input_model = os.path.join(dir_lora, "adapter_model.bin")
302
317
  lora_model = torch.load(input_model, map_location="cpu", weights_only=True)
303
318
 
319
+ # load LoRA config
320
+ with open(lora_config, "r") as f:
321
+ lparams: dict[str, Any] = json.load(f)
322
+
304
323
  # load base model
305
- logger.info(f"Loading base model: {dir_base_model.name}")
306
- hparams = Model.load_hparams(dir_base_model)
324
+ if base_model_id is not None:
325
+ logger.info(f"Loading base model from Hugging Face: {base_model_id}")
326
+ hparams = load_hparams_from_hf(base_model_id)
327
+ elif dir_base_model is None:
328
+ if "base_model_name_or_path" in lparams:
329
+ model_id = lparams["base_model_name_or_path"]
330
+ logger.info(f"Loading base model from Hugging Face: {model_id}")
331
+ try:
332
+ hparams = load_hparams_from_hf(model_id)
333
+ except OSError as e:
334
+ logger.error(f"Failed to load base model config: {e}")
335
+ logger.error("Please try downloading the base model and add its path to --base")
336
+ sys.exit(1)
337
+ else:
338
+ logger.error("'base_model_name_or_path' is not found in adapter_config.json")
339
+ logger.error("Base model config is required. Please download the base model and add its path to --base")
340
+ sys.exit(1)
341
+ else:
342
+ logger.info(f"Loading base model: {dir_base_model.name}")
343
+ hparams = Model.load_hparams(dir_base_model)
344
+
307
345
  with torch.inference_mode():
308
346
  try:
309
347
  model_class = Model.from_model_architecture(hparams["architectures"][0])
@@ -323,13 +361,19 @@ if __name__ == '__main__':
323
361
  self.dir_model_card = dir_lora_model
324
362
  self.lora_alpha = float(lora_alpha)
325
363
 
364
+ def set_vocab(self):
365
+ pass
366
+
326
367
  def set_type(self):
327
368
  self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
328
369
  self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
329
370
 
330
371
  def set_gguf_parameters(self):
331
372
  self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
332
- super().set_gguf_parameters()
373
+
374
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
375
+ # Never add extra tensors (e.g. rope_freqs) for LoRA adapters
376
+ return ()
333
377
 
334
378
  def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
335
379
  tensor_map: dict[str, PartialLoraTensor] = {}
@@ -338,12 +382,20 @@ if __name__ == '__main__':
338
382
  if self.lazy:
339
383
  tensor = LazyTorchTensor.from_eager(tensor)
340
384
  base_name = get_base_tensor_name(name)
341
- is_lora_a = ".lora_A.weight" in name
342
- is_lora_b = ".lora_B.weight" in name
385
+ # note: mergekit-extract-lora also adds token embeddings to the adapter
386
+ is_lora_a = ".lora_A.weight" in name or ".lora_embedding_A" in name
387
+ is_lora_b = ".lora_B.weight" in name or ".lora_embedding_B" in name
343
388
  if not is_lora_a and not is_lora_b:
344
389
  if ".base_layer.weight" in name:
345
390
  continue
391
+ # mergekit-extract-lora add these layernorm to the adapter, we need to keep them
392
+ if "_layernorm" in name or ".norm" in name:
393
+ yield (base_name, tensor)
394
+ continue
346
395
  logger.error(f"Unexpected name '{name}': Not a lora_A or lora_B tensor")
396
+ if ".embed_tokens.weight" in name or ".lm_head.weight" in name:
397
+ logger.error("Embeddings is present in the adapter. This can be due to new tokens added during fine tuning")
398
+ logger.error("Please refer to https://github.com/ggml-org/llama.cpp/pull/9948")
347
399
  sys.exit(1)
348
400
 
349
401
  if base_name in tensor_map:
@@ -363,17 +415,32 @@ if __name__ == '__main__':
363
415
  yield (name, cast(torch.Tensor, LoraTorchTensor(tensor.A, tensor.B)))
364
416
 
365
417
  def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
366
- dest = super().modify_tensors(data_torch, name, bid)
418
+ dest = list(super().modify_tensors(data_torch, name, bid))
419
+ # some archs may have the same tensor for lm_head and output (tie word embeddings)
420
+ # in this case, adapters targeting lm_head will fail when using llama-export-lora
421
+ # therefore, we ignore them for now
422
+ # see: https://github.com/ggml-org/llama.cpp/issues/9065
423
+ if name == "lm_head.weight" and len(dest) == 0:
424
+ raise ValueError("lm_head is present in adapter, but is ignored in base model")
367
425
  for dest_name, dest_data in dest:
426
+ # mergekit-extract-lora add these layernorm to the adapter
427
+ if "_norm" in dest_name:
428
+ assert dest_data.dim() == 1
429
+ yield (dest_name, dest_data)
430
+ continue
431
+
432
+ # otherwise, we must get the lora_A and lora_B tensors
368
433
  assert isinstance(dest_data, LoraTorchTensor)
369
434
  lora_a, lora_b = dest_data.get_lora_A_B()
370
435
 
436
+ # note: mergekit-extract-lora flip and transpose A and B
437
+ # here we only need to transpose token_embd.lora_a, see llm_build_inp_embd()
438
+ if "token_embd.weight" in dest_name:
439
+ lora_a = lora_a.T
440
+
371
441
  yield (dest_name + ".lora_a", lora_a)
372
442
  yield (dest_name + ".lora_b", lora_b)
373
443
 
374
- with open(lora_config, "r") as f:
375
- lparams: dict[str, Any] = json.load(f)
376
-
377
444
  alpha: float = lparams["lora_alpha"]
378
445
 
379
446
  model_instance = LoraModel(
@@ -386,6 +453,7 @@ if __name__ == '__main__':
386
453
  dry_run=args.dry_run,
387
454
  dir_lora_model=dir_lora,
388
455
  lora_alpha=alpha,
456
+ hparams=hparams,
389
457
  )
390
458
 
391
459
  logger.info("Exporting model...")