bigdl-core-cpp 2.7.0b20250630__py3-none-win_amd64.whl → 2.7.0b20250702__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. bigdl/cpp/convert_hf_to_gguf.py +1987 -558
  2. bigdl/cpp/convert_hf_to_gguf_update.py +131 -67
  3. bigdl/cpp/convert_lora_to_gguf.py +3 -3
  4. bigdl/cpp/gguf-py/gguf/constants.py +546 -16
  5. bigdl/cpp/gguf-py/gguf/gguf_reader.py +57 -6
  6. bigdl/cpp/gguf-py/gguf/gguf_writer.py +119 -7
  7. bigdl/cpp/gguf-py/gguf/lazy.py +10 -0
  8. bigdl/cpp/gguf-py/gguf/metadata.py +28 -8
  9. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +461 -48
  10. bigdl/cpp/gguf-py/gguf/utility.py +195 -0
  11. bigdl/cpp/gguf-py/gguf/vocab.py +6 -1
  12. bigdl/cpp/libs/llama_cpp/ggml-base.dll +0 -0
  13. bigdl/cpp/libs/llama_cpp/ggml-cpu.dll +0 -0
  14. bigdl/cpp/libs/llama_cpp/ggml-sycl.dll +0 -0
  15. bigdl/cpp/libs/llama_cpp/ggml.dll +0 -0
  16. bigdl/cpp/libs/llama_cpp/llama-batched.exe +0 -0
  17. bigdl/cpp/libs/llama_cpp/llama-bench.exe +0 -0
  18. bigdl/cpp/libs/llama_cpp/llama-cli.exe +0 -0
  19. bigdl/cpp/libs/llama_cpp/llama-embedding.exe +0 -0
  20. bigdl/cpp/libs/llama_cpp/llama-gemma3-cli.exe +0 -0
  21. bigdl/cpp/libs/llama_cpp/llama-gguf.exe +0 -0
  22. bigdl/cpp/libs/llama_cpp/llama-llava-cli.exe +0 -0
  23. bigdl/cpp/libs/llama_cpp/llama-lookup.exe +0 -0
  24. bigdl/cpp/libs/llama_cpp/llama-ls-sycl-device.exe +0 -0
  25. bigdl/cpp/libs/llama_cpp/llama-minicpmv-cli.exe +0 -0
  26. bigdl/cpp/libs/llama_cpp/llama-perplexity.exe +0 -0
  27. bigdl/cpp/libs/llama_cpp/llama-quantize.exe +0 -0
  28. bigdl/cpp/libs/llama_cpp/llama-server.exe +0 -0
  29. bigdl/cpp/libs/llama_cpp/llama-simple.exe +0 -0
  30. bigdl/cpp/libs/llama_cpp/llama-speculative.exe +0 -0
  31. bigdl/cpp/libs/llama_cpp/llama-tokenize.exe +0 -0
  32. bigdl/cpp/libs/llama_cpp/llama.dll +0 -0
  33. bigdl/cpp/libs/ollama/ggml-base.dll +0 -0
  34. bigdl/cpp/libs/ollama/ggml-cpu.dll +0 -0
  35. bigdl/cpp/libs/ollama/ggml-sycl.dll +0 -0
  36. bigdl/cpp/libs/ollama/ggml.dll +0 -0
  37. bigdl/cpp/libs/ollama/llama.dll +0 -0
  38. bigdl/cpp/libs/ollama/llava_shared.dll +0 -0
  39. bigdl/cpp/libs/ollama/mtmd_shared.dll +0 -0
  40. bigdl/cpp/libs/ollama/ollama-lib.exe +0 -0
  41. bigdl/cpp/libs/ollama/ollama.exe +0 -0
  42. {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250702.dist-info}/METADATA +1 -1
  43. bigdl_core_cpp-2.7.0b20250702.dist-info/RECORD +56 -0
  44. bigdl/cpp/libs/llama_cpp/llava_shared.dll +0 -0
  45. bigdl_core_cpp-2.7.0b20250630.dist-info/RECORD +0 -57
  46. {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250702.data}/scripts/init-llama-cpp.bat +0 -0
  47. {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250702.data}/scripts/init-llama-cpp.ps1 +0 -0
  48. {bigdl_core_cpp-2.7.0b20250630.data → bigdl_core_cpp-2.7.0b20250702.data}/scripts/init-ollama.bat +0 -0
  49. {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250702.dist-info}/WHEEL +0 -0
  50. {bigdl_core_cpp-2.7.0b20250630.dist-info → bigdl_core_cpp-2.7.0b20250702.dist-info}/top_level.txt +0 -0
@@ -1,28 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
3
 
4
- # This script downloads the tokenizer models of the specified models from Huggingface and
5
- # generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
6
- #
7
- # This is necessary in order to analyze the type of pre-tokenizer used by the model and
8
- # provide the necessary information to llama.cpp via the GGUF header in order to implement
9
- # the same pre-tokenizer.
10
- #
11
- # ref: https://github.com/ggml-org/llama.cpp/pull/6920
12
- #
13
- # Instructions:
14
- #
15
- # - Add a new model to the "models" list
16
- # - Run the script with your huggingface token:
17
- #
18
- # python3 convert_hf_to_gguf_update.py <huggingface_token>
19
- #
20
- # - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
21
- # - Update llama.cpp with the new pre-tokenizer if necessary
22
- #
23
- # TODO: generate tokenizer tests for llama.cpp
24
- #
25
-
26
4
  import logging
27
5
  import os
28
6
  import pathlib
@@ -32,6 +10,7 @@ import requests
32
10
  import sys
33
11
  import json
34
12
  import shutil
13
+ import argparse
35
14
 
36
15
  from hashlib import sha256
37
16
  from enum import IntEnum, auto
@@ -41,6 +20,11 @@ logging.basicConfig(level=logging.DEBUG)
41
20
  logger = logging.getLogger("convert_hf_to_gguf_update")
42
21
  sess = requests.Session()
43
22
 
23
+ convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
24
+ convert_py = convert_py_pth.read_text(encoding="utf-8")
25
+ hf_token_pth = pathlib.Path.home() / ".cache" / "huggingface" / "token"
26
+ hf_token = hf_token_pth.read_text(encoding="utf-8").strip() if hf_token_pth.exists() else None
27
+
44
28
 
45
29
  class TOKENIZER_TYPE(IntEnum):
46
30
  SPM = auto()
@@ -49,20 +33,49 @@ class TOKENIZER_TYPE(IntEnum):
49
33
  UGM = auto()
50
34
 
51
35
 
36
+ DOC_STRING = """
37
+ This script downloads the tokenizer models of the specified models from Huggingface and
38
+ generates the get_vocab_base_pre() function for convert_hf_to_gguf.py
39
+
40
+ /!\\ It is intended to be used by contributors and is not meant to be run by end users
41
+
42
+ This is necessary in order to analyze the type of pre-tokenizer used by the model and
43
+ provide the necessary information to llama.cpp via the GGUF header in order to implement
44
+ the same pre-tokenizer.
45
+
46
+ ref: https://github.com/ggml-org/llama.cpp/pull/6920
47
+
48
+ Instructions:
49
+
50
+ - Add a new model to the "models" list
51
+ - Run the script with your huggingface token
52
+ By default, token will be read from ~/.cache/huggingface/token
53
+ - The convert_hf_to_gguf.py script will have had its get_vocab_base_pre() function updated
54
+ - Update llama.cpp with the new pre-tokenizer if necessary
55
+ """
56
+ # TODO: generate tokenizer tests for llama.cpp
57
+
58
+ parser = argparse.ArgumentParser(description=DOC_STRING, formatter_class=argparse.RawTextHelpFormatter)
59
+ parser.add_argument(
60
+ "--full", action="store_true",
61
+ help="download full list of models - make sure you have access to all of them",
62
+ )
63
+ parser.add_argument(
64
+ "hf_token",
65
+ help="optional HF token",
66
+ nargs="?",
67
+ )
68
+ args = parser.parse_args()
69
+ hf_token = args.hf_token if args.hf_token is not None else hf_token
70
+
71
+ if hf_token is None:
72
+ logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
73
+ sys.exit(1)
74
+
52
75
  # TODO: this string has to exercise as much pre-tokenizer functionality as possible
53
76
  # will be updated with time - contributions welcome
54
77
  CHK_TXT = '\n \n\n \n\n\n \t \t\t \t\n \n \n \n \n🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български \'\'\'\'\'\'```````\"\"\"\"......!!!!!!?????? I\'ve been \'told he\'s there, \'RE you sure? \'M not sure I\'ll make it, \'D you like some tea? We\'Ve a\'lL'
55
78
 
56
- if len(sys.argv) == 2:
57
- token = sys.argv[1]
58
- if not token.startswith("hf_"):
59
- logger.info("Huggingface token seems invalid")
60
- logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
61
- sys.exit(1)
62
- else:
63
- logger.info("Usage: python convert_hf_to_gguf_update.py <huggingface_token>")
64
- sys.exit(1)
65
-
66
79
  # TODO: add models here, base models preferred
67
80
  models = [
68
81
  {"name": "llama-spm", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/meta-llama/Llama-2-7b-hf", },
@@ -103,12 +116,27 @@ models = [
103
116
  {"name": "exaone", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct", },
104
117
  {"name": "phi-2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/microsoft/phi-2", },
105
118
  {"name": "chameleon", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/facebook/chameleon-7b", },
106
- {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", },
107
119
  {"name": "roberta-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sentence-transformers/stsb-roberta-base"},
108
120
  {"name": "gigachat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct"},
109
121
  {"name": "megrez", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Infinigence/Megrez-3B-Instruct"},
110
122
  {"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
111
123
  {"name": "deepseek-r1-qwen", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"},
124
+ {"name": "gpt-4o", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Xenova/gpt-4o", },
125
+ {"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
126
+ {"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
127
+ {"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
128
+ {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
129
+ {"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
130
+ {"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
131
+ ]
132
+
133
+ # some models are known to be broken upstream, so we will skip them as exceptions
134
+ pre_computed_hashes = [
135
+ # chatglm-bpe has 2 hashes, why?
136
+ {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b"},
137
+ {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
138
+ {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
139
+ {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
112
140
  ]
113
141
 
114
142
 
@@ -131,6 +159,10 @@ def download_model(model):
131
159
 
132
160
  files = ["config.json", "tokenizer.json", "tokenizer_config.json"]
133
161
 
162
+ if name == "gpt-4o":
163
+ # Xenova/gpt-4o is tokenizer-only, it does not contain config.json
164
+ files = ["tokenizer.json", "tokenizer_config.json"]
165
+
134
166
  if tokt == TOKENIZER_TYPE.SPM:
135
167
  files.append("tokenizer.model")
136
168
 
@@ -157,9 +189,29 @@ def download_model(model):
157
189
  if os.path.isfile(save_path):
158
190
  logger.info(f"{name}: File {save_path} already exists - skipping")
159
191
  continue
160
- download_file_with_auth(f"{repo}/resolve/main/{file}", token, save_path)
192
+ download_file_with_auth(f"{repo}/resolve/main/{file}", hf_token, save_path)
193
+
194
+
195
+ # get list of existing models and chkhsh from the convert_hf_to_gguf.py file
196
+ # returns mapping res --> chkhsh
197
+ def get_existing_models(convert_py):
198
+ pattern = r'if chkhsh == "([a-f0-9]{64})":\s*\n\s*.*\s*res = "([^"]+)"'
199
+ matches = re.findall(pattern, convert_py)
200
+ output = {}
201
+ for chkhsh, res in matches:
202
+ output[res] = chkhsh
203
+ return output
161
204
 
162
205
 
206
+ existing_models = {}
207
+ all_models = models.copy()
208
+ if not args.full:
209
+ # Filter out models that already exist in convert_hf_to_gguf.py
210
+ existing_models = get_existing_models(convert_py)
211
+ all_models = models.copy()
212
+ models = [model for model in all_models if model["name"] not in existing_models]
213
+
214
+ logging.info(f"Downloading {len(models)} models...")
163
215
  for model in models:
164
216
  try:
165
217
  download_model(model)
@@ -170,9 +222,10 @@ for model in models:
170
222
  # generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function:
171
223
 
172
224
  src_ifs = ""
173
- for model in models:
225
+ for model in [*all_models, *pre_computed_hashes]:
174
226
  name = model["name"]
175
227
  tokt = model["tokt"]
228
+ chkhsh = model.get("chkhsh")
176
229
 
177
230
  if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM:
178
231
  continue
@@ -183,35 +236,44 @@ for model in models:
183
236
  continue
184
237
 
185
238
  # create the tokenizer
186
- try:
187
- if name == "t5":
188
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
189
- else:
190
- tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
191
- except OSError as e:
192
- logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
193
- continue # Skip to the next model if the tokenizer can't be loaded
194
-
195
- chktok = tokenizer.encode(CHK_TXT)
196
- chkhsh = sha256(str(chktok).encode()).hexdigest()
197
-
198
- logger.info(f"model: {name}")
199
- logger.info(f"tokt: {tokt}")
200
- logger.info(f"repo: {model['repo']}")
201
- logger.info(f"chktok: {chktok}")
202
- logger.info(f"chkhsh: {chkhsh}")
203
-
204
- # print the "pre_tokenizer" content from the tokenizer.json
205
- with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
206
- cfg = json.load(f)
207
- normalizer = cfg["normalizer"]
208
- logger.info("normalizer: " + json.dumps(normalizer, indent=4))
209
- pre_tokenizer = cfg["pre_tokenizer"]
210
- logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
211
- if "ignore_merges" in cfg["model"]:
212
- logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
213
-
214
- logger.info("")
239
+ if chkhsh is not None:
240
+ # if the model has a pre-computed hash, use it
241
+ logger.info(f"Using pre-computed hash for model {name}: {chkhsh}")
242
+ elif name in existing_models:
243
+ # if the model already exists in convert_hf_to_gguf.py, skip compute hash
244
+ chkhsh = existing_models[name]
245
+ else:
246
+ # otherwise, compute the hash of the tokenizer
247
+ try:
248
+ logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
249
+ if name == "t5":
250
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
251
+ else:
252
+ tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
253
+ except OSError as e:
254
+ logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
255
+ continue # Skip to the next model if the tokenizer can't be loaded
256
+
257
+ chktok = tokenizer.encode(CHK_TXT)
258
+ chkhsh = sha256(str(chktok).encode()).hexdigest()
259
+
260
+ logger.info(f"model: {name}")
261
+ logger.info(f"tokt: {tokt}")
262
+ logger.info(f"repo: {model['repo']}")
263
+ logger.info(f"chktok: {chktok}")
264
+ logger.info(f"chkhsh: {chkhsh}")
265
+
266
+ # print the "pre_tokenizer" content from the tokenizer.json
267
+ with open(f"models/tokenizers/{name}/tokenizer.json", "r", encoding="utf-8") as f:
268
+ cfg = json.load(f)
269
+ normalizer = cfg["normalizer"]
270
+ logger.info("normalizer: " + json.dumps(normalizer, indent=4))
271
+ pre_tokenizer = cfg["pre_tokenizer"]
272
+ logger.info("pre_tokenizer: " + json.dumps(pre_tokenizer, indent=4))
273
+ if "ignore_merges" in cfg["model"]:
274
+ logger.info("ignore_merges: " + json.dumps(cfg["model"]["ignore_merges"], indent=4))
275
+
276
+ logger.info("")
215
277
 
216
278
  src_ifs += f" if chkhsh == \"{chkhsh}\":\n"
217
279
  src_ifs += f" # ref: {model['repo']}\n"
@@ -259,8 +321,6 @@ src_func = f"""
259
321
  return res
260
322
  """
261
323
 
262
- convert_py_pth = pathlib.Path("convert_hf_to_gguf.py")
263
- convert_py = convert_py_pth.read_text(encoding="utf-8")
264
324
  convert_py = re.sub(
265
325
  r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
266
326
  lambda m: m.group(1) + src_func + m.group(3),
@@ -276,7 +336,7 @@ logger.info("+++ convert_hf_to_gguf.py was updated")
276
336
 
277
337
  tests = [
278
338
  "ied 4 ½ months",
279
- "Führer",
339
+ "Äpfel",
280
340
  "",
281
341
  " ",
282
342
  " ",
@@ -355,6 +415,10 @@ for model in models:
355
415
  logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
356
416
  continue # Skip this model and continue with the next one in the loop
357
417
 
418
+ if not os.path.exists(f"models/ggml-vocab-{name}.gguf"):
419
+ logger.info(f"Skip vocab files for model {name}, no GGUF file found")
420
+ continue
421
+
358
422
  with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
359
423
  for text in tests:
360
424
  f.write(f"{text}")
@@ -24,7 +24,7 @@ if 'NO_LOCAL_GGUF' not in os.environ:
24
24
  import gguf
25
25
 
26
26
  # reuse model definitions from convert_hf_to_gguf.py
27
- from convert_hf_to_gguf import LazyTorchTensor, Model
27
+ from convert_hf_to_gguf import LazyTorchTensor, ModelBase
28
28
 
29
29
  logger = logging.getLogger("lora-to-gguf")
30
30
 
@@ -340,11 +340,11 @@ if __name__ == '__main__':
340
340
  sys.exit(1)
341
341
  else:
342
342
  logger.info(f"Loading base model: {dir_base_model.name}")
343
- hparams = Model.load_hparams(dir_base_model)
343
+ hparams = ModelBase.load_hparams(dir_base_model)
344
344
 
345
345
  with torch.inference_mode():
346
346
  try:
347
- model_class = Model.from_model_architecture(hparams["architectures"][0])
347
+ model_class = ModelBase.from_model_architecture(hparams["architectures"][0])
348
348
  except NotImplementedError:
349
349
  logger.error(f"Model {hparams['architectures'][0]} is not supported")
350
350
  sys.exit(1)