bigdl-core-cpp 2.5.0b20240507__py3-none-win_amd64.whl → 2.5.0b20240509__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. bigdl/cpp/convert-hf-to-gguf.py +3177 -0
  2. bigdl/cpp/convert.py +36 -24
  3. bigdl/cpp/gguf-py/gguf/constants.py +19 -3
  4. bigdl/cpp/gguf-py/gguf/gguf_reader.py +16 -3
  5. bigdl/cpp/gguf-py/gguf/gguf_writer.py +12 -1
  6. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +2 -0
  7. bigdl/cpp/gguf-py/gguf/vocab.py +13 -29
  8. bigdl/cpp/libs/baby-llama.exe +0 -0
  9. bigdl/cpp/libs/batched-bench.exe +0 -0
  10. bigdl/cpp/libs/batched.exe +0 -0
  11. bigdl/cpp/libs/beam-search.exe +0 -0
  12. bigdl/cpp/libs/benchmark.exe +0 -0
  13. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  14. bigdl/cpp/libs/embedding.exe +0 -0
  15. bigdl/cpp/libs/export-lora.exe +0 -0
  16. bigdl/cpp/libs/finetune.exe +0 -0
  17. bigdl/cpp/libs/ggml_shared.dll +0 -0
  18. bigdl/cpp/libs/gguf.exe +0 -0
  19. bigdl/cpp/libs/gritlm.exe +0 -0
  20. bigdl/cpp/libs/imatrix.exe +0 -0
  21. bigdl/cpp/libs/infill.exe +0 -0
  22. bigdl/cpp/libs/llama-bench.exe +0 -0
  23. bigdl/cpp/libs/llama.dll +0 -0
  24. bigdl/cpp/libs/llava-cli.exe +0 -0
  25. bigdl/cpp/libs/llava_shared.dll +0 -0
  26. bigdl/cpp/libs/lookahead.exe +0 -0
  27. bigdl/cpp/libs/lookup.exe +0 -0
  28. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  29. bigdl/cpp/libs/main.exe +0 -0
  30. bigdl/cpp/libs/ollama.exe +0 -0
  31. bigdl/cpp/libs/parallel.exe +0 -0
  32. bigdl/cpp/libs/passkey.exe +0 -0
  33. bigdl/cpp/libs/perplexity.exe +0 -0
  34. bigdl/cpp/libs/q8dot.exe +0 -0
  35. bigdl/cpp/libs/quantize-stats.exe +0 -0
  36. bigdl/cpp/libs/quantize.exe +0 -0
  37. bigdl/cpp/libs/save-load-state.exe +0 -0
  38. bigdl/cpp/libs/server.exe +0 -0
  39. bigdl/cpp/libs/simple.exe +0 -0
  40. bigdl/cpp/libs/speculative.exe +0 -0
  41. bigdl/cpp/libs/tokenize.exe +0 -0
  42. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  43. bigdl/cpp/libs/vdot.exe +0 -0
  44. {bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-llama-cpp.bat +1 -0
  45. {bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/METADATA +1 -1
  46. bigdl_core_cpp-2.5.0b20240509.dist-info/RECORD +55 -0
  47. bigdl_core_cpp-2.5.0b20240507.dist-info/RECORD +0 -54
  48. {bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-llama-cpp.ps1 +0 -0
  49. {bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-ollama.bat +0 -0
  50. {bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/WHEEL +0 -0
  51. {bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/top_level.txt +0 -0
bigdl/cpp/convert.py CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  from __future__ import annotations
3
3
 
4
+ import logging
4
5
  import argparse
5
6
  import concurrent.futures
6
7
  import enum
@@ -35,6 +36,8 @@ import gguf
35
36
  if TYPE_CHECKING:
36
37
  from typing_extensions import Self, TypeAlias
37
38
 
39
+ logger = logging.getLogger("convert")
40
+
38
41
  if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
39
42
  faulthandler.register(signal.SIGUSR1)
40
43
 
@@ -643,7 +646,6 @@ class LlamaHfVocab(Vocab):
643
646
 
644
647
 
645
648
  def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
646
- # print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
647
649
  if n_head_kv is not None and n_head != n_head_kv:
648
650
  n_head = n_head_kv
649
651
  return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
@@ -1033,12 +1035,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False)
1033
1035
 
1034
1036
  # Check for a vocab size mismatch
1035
1037
  if params.n_vocab == vocab.vocab_size:
1036
- print("Ignoring added_tokens.json since model matches vocab size without it.")
1038
+ logger.warning("Ignoring added_tokens.json since model matches vocab size without it.")
1037
1039
  return
1038
1040
 
1039
1041
  if pad_vocab and params.n_vocab > vocab.vocab_size:
1040
1042
  pad_count = params.n_vocab - vocab.vocab_size
1041
- print(
1043
+ logger.debug(
1042
1044
  f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>"
1043
1045
  )
1044
1046
  for i in range(1, pad_count + 1):
@@ -1166,7 +1168,7 @@ class OutputFile:
1166
1168
  elapsed = time.time() - start
1167
1169
  size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
1168
1170
  padi = len(str(len(model)))
1169
- print(
1171
+ logger.info(
1170
1172
  f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
1171
1173
  )
1172
1174
  self.gguf.write_tensor_data(ndarray)
@@ -1281,12 +1283,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
1281
1283
  # HF models permut or pack some of the tensors, so we need to undo that
1282
1284
  for i in itertools.count():
1283
1285
  if f"model.layers.{i}.self_attn.q_proj.weight" in model:
1284
- print(f"Permuting layer {i}")
1286
+ logger.debug(f"Permuting layer {i}")
1285
1287
  tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
1286
1288
  tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
1287
1289
  # tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
1288
1290
  elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
1289
- print(f"Unpacking and permuting layer {i}")
1291
+ logger.debug(f"Unpacking and permuting layer {i}")
1290
1292
  tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
1291
1293
  tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
1292
1294
  tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
@@ -1299,15 +1301,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
1299
1301
  tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
1300
1302
  if name_new is None:
1301
1303
  if skip_unknown:
1302
- print(f"Unexpected tensor name: {name} - skipping")
1304
+ logger.warning(f"Unexpected tensor name: {name} - skipping")
1303
1305
  continue
1304
1306
  raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
1305
1307
 
1306
1308
  if tensor_type in should_skip:
1307
- print(f"skipping tensor {name_new}")
1309
+ logger.debug(f"skipping tensor {name_new}")
1308
1310
  continue
1309
1311
 
1310
- print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
1312
+ logger.debug(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
1311
1313
  out[name_new] = lazy_tensor
1312
1314
 
1313
1315
  return out
@@ -1372,7 +1374,7 @@ def load_some_model(path: Path) -> ModelPlus:
1372
1374
  paths = find_multifile_paths(path)
1373
1375
  models_plus: list[ModelPlus] = []
1374
1376
  for path in paths:
1375
- print(f"Loading model file {path}")
1377
+ logger.info(f"Loading model file {path}")
1376
1378
  models_plus.append(lazy_load_file(path))
1377
1379
 
1378
1380
  model_plus = merge_multifile_models(models_plus)
@@ -1413,7 +1415,7 @@ class VocabFactory:
1413
1415
  else:
1414
1416
  raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
1415
1417
 
1416
- print(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
1418
+ logger.info(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
1417
1419
  return vocab
1418
1420
 
1419
1421
  def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
@@ -1438,19 +1440,19 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
1438
1440
  }[file_type]
1439
1441
  ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
1440
1442
  if ret in model_paths:
1441
- sys.stderr.write(
1443
+ logger.error(
1442
1444
  f"Error: Default output path ({ret}) would overwrite the input. "
1443
- "Please explicitly specify a path using --outfile.\n")
1445
+ "Please explicitly specify a path using --outfile.")
1444
1446
  sys.exit(1)
1445
1447
  return ret
1446
1448
 
1447
1449
 
1448
1450
  def do_dump_model(model_plus: ModelPlus) -> None:
1449
- print(f"model_plus.paths = {model_plus.paths!r}")
1450
- print(f"model_plus.format = {model_plus.format!r}")
1451
- print(f"model_plus.vocab = {model_plus.vocab!r}")
1451
+ print(f"model_plus.paths = {model_plus.paths!r}") # noqa: NP100
1452
+ print(f"model_plus.format = {model_plus.format!r}") # noqa: NP100
1453
+ print(f"model_plus.vocab = {model_plus.vocab!r}") # noqa: NP100
1452
1454
  for name, lazy_tensor in model_plus.model.items():
1453
- print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")
1455
+ print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") # noqa: NP100
1454
1456
 
1455
1457
 
1456
1458
  def main(args_in: list[str] | None = None) -> None:
@@ -1473,8 +1475,18 @@ def main(args_in: list[str] | None = None) -> None:
1473
1475
  parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
1474
1476
  parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
1475
1477
  parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
1478
+ parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
1476
1479
 
1477
1480
  args = parser.parse_args(args_in)
1481
+
1482
+ if args.verbose:
1483
+ logging.basicConfig(level=logging.DEBUG)
1484
+ elif args.dump_single or args.dump:
1485
+ # Avoid printing anything besides the dump output
1486
+ logging.basicConfig(level=logging.WARNING)
1487
+ else:
1488
+ logging.basicConfig(level=logging.INFO)
1489
+
1478
1490
  if args.no_vocab and args.vocab_only:
1479
1491
  raise ValueError("--vocab-only does not make sense with --no-vocab")
1480
1492
 
@@ -1491,6 +1503,7 @@ def main(args_in: list[str] | None = None) -> None:
1491
1503
  if args.dump:
1492
1504
  do_dump_model(model_plus)
1493
1505
  return
1506
+
1494
1507
  endianess = gguf.GGUFEndian.LITTLE
1495
1508
  if args.big_endian:
1496
1509
  endianess = gguf.GGUFEndian.BIG
@@ -1513,7 +1526,7 @@ def main(args_in: list[str] | None = None) -> None:
1513
1526
  "q8_0": GGMLFileType.MostlyQ8_0,
1514
1527
  }[args.outtype]
1515
1528
 
1516
- print(f"params = {params}")
1529
+ logger.info(f"params = {params}")
1517
1530
 
1518
1531
  model_parent_path = model_plus.paths[0].parent
1519
1532
  vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
@@ -1528,15 +1541,14 @@ def main(args_in: list[str] | None = None) -> None:
1528
1541
  outfile = args.outfile
1529
1542
  OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
1530
1543
  endianess=endianess, pad_vocab=args.pad_vocab)
1531
- print(f"Wrote {outfile}")
1544
+ logger.info(f"Wrote {outfile}")
1532
1545
  return
1533
1546
 
1534
1547
  if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
1535
1548
  vocab = model_plus.vocab
1536
1549
 
1537
- print(f"Vocab info: {vocab}")
1538
- print(f"Special vocab info: {special_vocab}")
1539
-
1550
+ logger.info(f"Vocab info: {vocab}")
1551
+ logger.info(f"Special vocab info: {special_vocab}")
1540
1552
  model = model_plus.model
1541
1553
  model = convert_model_names(model, params, args.skip_unknown)
1542
1554
  ftype = pick_output_type(model, args.outtype)
@@ -1544,11 +1556,11 @@ def main(args_in: list[str] | None = None) -> None:
1544
1556
  outfile = args.outfile or default_outfile(model_plus.paths, ftype)
1545
1557
 
1546
1558
  params.ftype = ftype
1547
- print(f"Writing {outfile}, format {ftype}")
1559
+ logger.info(f"Writing {outfile}, format {ftype}")
1548
1560
 
1549
1561
  OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
1550
1562
  concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
1551
- print(f"Wrote {outfile}")
1563
+ logger.info(f"Wrote {outfile}")
1552
1564
 
1553
1565
 
1554
1566
  if __name__ == '__main__':
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import sys
4
3
  from enum import Enum, IntEnum, auto
5
4
  from typing import Any
6
5
 
@@ -72,6 +71,7 @@ class Keys:
72
71
 
73
72
  class Tokenizer:
74
73
  MODEL = "tokenizer.ggml.model"
74
+ PRE = "tokenizer.ggml.pre"
75
75
  LIST = "tokenizer.ggml.tokens"
76
76
  TOKEN_TYPE = "tokenizer.ggml.token_type"
77
77
  TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
@@ -124,6 +124,7 @@ class MODEL_ARCH(IntEnum):
124
124
  QWEN2 = auto()
125
125
  QWEN2MOE = auto()
126
126
  PHI2 = auto()
127
+ PHI3 = auto()
127
128
  PLAMO = auto()
128
129
  CODESHELL = auto()
129
130
  ORION = auto()
@@ -200,6 +201,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
200
201
  MODEL_ARCH.QWEN2: "qwen2",
201
202
  MODEL_ARCH.QWEN2MOE: "qwen2moe",
202
203
  MODEL_ARCH.PHI2: "phi2",
204
+ MODEL_ARCH.PHI3: "phi3",
203
205
  MODEL_ARCH.PLAMO: "plamo",
204
206
  MODEL_ARCH.CODESHELL: "codeshell",
205
207
  MODEL_ARCH.ORION: "orion",
@@ -550,6 +552,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
550
552
  MODEL_TENSOR.FFN_DOWN,
551
553
  MODEL_TENSOR.FFN_UP,
552
554
  ],
555
+ MODEL_ARCH.PHI3: [
556
+ MODEL_TENSOR.TOKEN_EMBD,
557
+ MODEL_TENSOR.OUTPUT_NORM,
558
+ MODEL_TENSOR.OUTPUT,
559
+ MODEL_TENSOR.ATTN_NORM,
560
+ MODEL_TENSOR.ATTN_QKV,
561
+ MODEL_TENSOR.ATTN_Q,
562
+ MODEL_TENSOR.ATTN_K,
563
+ MODEL_TENSOR.ATTN_V,
564
+ MODEL_TENSOR.ATTN_OUT,
565
+ MODEL_TENSOR.FFN_NORM,
566
+ MODEL_TENSOR.FFN_DOWN,
567
+ MODEL_TENSOR.FFN_UP,
568
+ ],
553
569
  MODEL_ARCH.CODESHELL: [
554
570
  MODEL_TENSOR.TOKEN_EMBD,
555
571
  MODEL_TENSOR.POS_EMBD,
@@ -837,8 +853,7 @@ class GGUFValueType(IntEnum):
837
853
  return GGUFValueType.INT32
838
854
  # TODO: need help with 64-bit types in Python
839
855
  else:
840
- print("Unknown type:", type(val))
841
- sys.exit()
856
+ raise ValueError(f"Unknown type: {type(val)}")
842
857
 
843
858
 
844
859
  # Note: Does not support GGML_QKK_64
@@ -924,6 +939,7 @@ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
924
939
 
925
940
  # tokenization
926
941
  KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
942
+ KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
927
943
  KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
928
944
  KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
929
945
  KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
@@ -4,6 +4,7 @@
4
4
  #
5
5
  from __future__ import annotations
6
6
 
7
+ import logging
7
8
  import os
8
9
  from collections import OrderedDict
9
10
  from typing import Any, Literal, NamedTuple, TypeVar, Union
@@ -27,6 +28,7 @@ from gguf.constants import (
27
28
  GGUFValueType,
28
29
  )
29
30
 
31
+ logger = logging.getLogger(__name__)
30
32
 
31
33
  READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
32
34
 
@@ -139,8 +141,13 @@ class GGUFReader:
139
141
 
140
142
  def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
141
143
  if field.name in self.fields:
142
- raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
143
- self.fields[field.name] = field
144
+ # TODO: add option to generate error on duplicate keys
145
+ # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
146
+
147
+ logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
148
+ self.fields[field.name + '_{}'.format(field.offset)] = field
149
+ else:
150
+ self.fields[field.name] = field
144
151
  return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
145
152
 
146
153
  def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
@@ -234,8 +241,14 @@ class GGUFReader:
234
241
 
235
242
  def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
236
243
  tensors = []
244
+ tensor_names = set() # keep track of name to prevent duplicated tensors
237
245
  for field in fields:
238
246
  _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
247
+ # check if there's any tensor having same name already in the list
248
+ tensor_name = str(bytes(name_data), encoding = 'utf-8')
249
+ if tensor_name in tensor_names:
250
+ raise ValueError(f'Found duplicated tensor with name {tensor_name}')
251
+ tensor_names.add(tensor_name)
239
252
  ggml_type = GGMLQuantizationType(raw_dtype[0])
240
253
  n_elems = np.prod(dims)
241
254
  block_size, type_size = GGML_QUANT_SIZES[ggml_type]
@@ -267,7 +280,7 @@ class GGUFReader:
267
280
  item_count = n_bytes
268
281
  item_type = np.uint8
269
282
  tensors.append(ReaderTensor(
270
- name = str(bytes(name_data), encoding = 'utf-8'),
283
+ name = tensor_name,
271
284
  tensor_type = ggml_type,
272
285
  shape = dims,
273
286
  n_elements = n_elems,
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
3
4
  import os
4
5
  import shutil
5
6
  import struct
@@ -24,6 +25,8 @@ from .constants import (
24
25
  TokenType,
25
26
  )
26
27
 
28
+ logger = logging.getLogger(__name__)
29
+
27
30
 
28
31
  class WriterState(Enum):
29
32
  EMPTY = auto()
@@ -63,10 +66,11 @@ class GGUFWriter:
63
66
  self.kv_data_count = 0
64
67
  self.ti_data = bytearray()
65
68
  self.ti_data_count = 0
69
+ self.ti_names = set()
66
70
  self.use_temp_file = use_temp_file
67
71
  self.temp_file = None
68
72
  self.tensors = []
69
- print("gguf: This GGUF file is for {0} Endian only".format(
73
+ logger.info("gguf: This GGUF file is for {0} Endian only".format(
70
74
  "Big" if self.endianess == GGUFEndian.BIG else "Little",
71
75
  ))
72
76
  self.state = WriterState.EMPTY
@@ -197,6 +201,10 @@ class GGUFWriter:
197
201
  if self.state is not WriterState.EMPTY:
198
202
  raise ValueError(f'Expected output file to be empty, got {self.state}')
199
203
 
204
+ if name in self.ti_names:
205
+ raise ValueError(f'Duplicated tensor name {name}')
206
+ self.ti_names.add(name)
207
+
200
208
  encoded_name = name.encode("utf8")
201
209
  self.ti_data += self._pack("Q", len(encoded_name))
202
210
  self.ti_data += encoded_name
@@ -422,6 +430,9 @@ class GGUFWriter:
422
430
  def add_tokenizer_model(self, model: str) -> None:
423
431
  self.add_string(Keys.Tokenizer.MODEL, model)
424
432
 
433
+ def add_tokenizer_pre(self, pre: str) -> None:
434
+ self.add_string(Keys.Tokenizer.PRE, pre)
435
+
425
436
  def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
426
437
  self.add_array(Keys.Tokenizer.LIST, tokens)
427
438
 
@@ -117,6 +117,7 @@ class TensorNameMap:
117
117
  "h.{bid}.attn.c_attn", # gpt2
118
118
  "transformer.h.{bid}.mixer.Wqkv", # phi2
119
119
  "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
120
+ "model.layers.{bid}.self_attn.qkv_proj" # phi3
120
121
  ),
121
122
 
122
123
  # Attention query
@@ -234,6 +235,7 @@ class TensorNameMap:
234
235
  "h.{bid}.mlp.c_fc", # gpt2
235
236
  "transformer.h.{bid}.mlp.fc1", # phi2
236
237
  "model.layers.{bid}.mlp.fc1", # phi2
238
+ "model.layers.{bid}.mlp.gate_up_proj", # phi3
237
239
  "model.layers.layers.{bid}.mlp.up_proj", # plamo
238
240
  "model.layers.{bid}.feed_forward.w3", # internlm2
239
241
  "encoder.layers.{bid}.mlp.fc11", # nomic-bert
@@ -1,13 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
3
4
  import json
4
5
  import os
5
- import sys
6
6
  from pathlib import Path
7
7
  from typing import Any, Callable
8
8
 
9
9
  from .gguf_writer import GGUFWriter
10
10
 
11
+ logger = logging.getLogger(__name__)
12
+
11
13
 
12
14
  class SpecialVocab:
13
15
  merges: list[str]
@@ -40,38 +42,29 @@ class SpecialVocab:
40
42
  def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
41
43
  if self.merges:
42
44
  if not quiet:
43
- print(f'gguf: Adding {len(self.merges)} merge(s).')
45
+ logger.info(f'Adding {len(self.merges)} merge(s).')
44
46
  gw.add_token_merges(self.merges)
45
47
  elif self.load_merges:
46
- print(
47
- 'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
48
- file = sys.stderr,
49
- )
48
+ logger.warning('Adding merges requested but no merges found, output may be non-functional.')
50
49
  for typ, tokid in self.special_token_ids.items():
51
50
  id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
52
51
  if id_handler is None:
53
- print(
54
- f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
55
- file = sys.stderr,
56
- )
52
+ logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
57
53
  continue
58
54
  if not quiet:
59
- print(f'gguf: Setting special token type {typ} to {tokid}')
55
+ logger.info(f'Setting special token type {typ} to {tokid}')
60
56
  id_handler(tokid)
61
57
  for typ, value in self.add_special_token.items():
62
58
  add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
63
59
  if add_handler is None:
64
- print(
65
- f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
66
- file = sys.stderr,
67
- )
60
+ logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
68
61
  continue
69
62
  if not quiet:
70
- print(f'gguf: Setting add_{typ}_token to {value}')
63
+ logger.info(f'Setting add_{typ}_token to {value}')
71
64
  add_handler(value)
72
65
  if self.chat_template is not None:
73
66
  if not quiet:
74
- print(f'gguf: Setting chat_template to {self.chat_template}')
67
+ logger.info(f'Setting chat_template to {self.chat_template}')
75
68
  gw.add_chat_template(self.chat_template)
76
69
 
77
70
  def _load(self, path: Path) -> None:
@@ -99,10 +92,7 @@ class SpecialVocab:
99
92
  continue
100
93
  parts = line.split(None, 3)
101
94
  if len(parts) != 2:
102
- print(
103
- f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
104
- file = sys.stderr,
105
- )
95
+ logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
106
96
  continue
107
97
  merges.append(f'{parts[0]} {parts[1]}')
108
98
  self.merges = merges
@@ -118,10 +108,7 @@ class SpecialVocab:
118
108
  return
119
109
  self.special_token_ids[typ] = tid
120
110
  return
121
- print(
122
- f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
123
- file = sys.stderr,
124
- )
111
+ logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
125
112
 
126
113
  def _try_load_from_tokenizer_json(self, path: Path) -> bool:
127
114
  tokenizer_file = path / 'tokenizer.json'
@@ -144,10 +131,7 @@ class SpecialVocab:
144
131
  if chat_template is None or isinstance(chat_template, (str, list)):
145
132
  self.chat_template = chat_template
146
133
  else:
147
- print(
148
- f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
149
- file = sys.stderr
150
- )
134
+ logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
151
135
  for typ in self.special_token_types:
152
136
  add_entry = tokenizer_config.get(f'add_{typ}_token')
153
137
  if isinstance(add_entry, bool):
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/gguf.exe CHANGED
Binary file
bigdl/cpp/libs/gritlm.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/infill.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/llama.dll CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/lookup.exe CHANGED
Binary file
Binary file
bigdl/cpp/libs/main.exe CHANGED
Binary file
bigdl/cpp/libs/ollama.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/q8dot.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/server.exe CHANGED
Binary file
bigdl/cpp/libs/simple.exe CHANGED
Binary file
Binary file
Binary file
Binary file
bigdl/cpp/libs/vdot.exe CHANGED
Binary file
@@ -15,4 +15,5 @@ for %%f in (*) do (
15
15
  popd
16
16
 
17
17
  copy "%cpp_dir%\convert.py" .
18
+ copy "%cpp_dir%\convert-hf-to-gguf.py" .
18
19
  xcopy /E /I "%cpp_dir%\gguf-py\" .\gguf-py
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bigdl-core-cpp
3
- Version: 2.5.0b20240507
3
+ Version: 2.5.0b20240509
4
4
  Summary: Large Language Model Develop Toolkit
5
5
  Author: BigDL Authors
6
6
  License: Apache License, Version 2.0
@@ -0,0 +1,55 @@
1
+ bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ bigdl/cpp/convert-hf-to-gguf.py,sha256=67U2K3ix3i1lps7L57-qiTVu5-VvJQbAXcan4aU0KT4,146672
3
+ bigdl/cpp/convert.py,sha256=OPDsmbVairx1s6ftM-G8MhUpPpEwvF48yS8HhOB7_j4,65023
4
+ bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ bigdl/cpp/gguf-py/gguf/__init__.py,sha256=2eyRnO5rS0WNuzL_N159IRA66ybYkisoSETMSM-tGBQ,135
6
+ bigdl/cpp/gguf-py/gguf/constants.py,sha256=ovAbNrRVlAchqxDpHU3Cey24Kcq6pEmFTmOu_ZZ71ng,32165
7
+ bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
8
+ bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=e-yzOSzsKjOfaCzXQ8QsJjBIypdqFdI5NO9tCS3VUZk,12081
9
+ bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=TM3UeO9d7Y3_uB3GikB4iqZt0uJw7OydijnUEoiFYAA,20496
10
+ bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=CxeOv5ncOICvskNibM1AFs24QUI6N3n_trU0Dvy8wu8,22290
12
+ bigdl/cpp/gguf-py/gguf/vocab.py,sha256=YiWxOW4ta0R3OI2CWeF3o7RIvJA6O0jQ9fxHnartxvU,6955
13
+ bigdl/cpp/libs/baby-llama.exe,sha256=TYHwbN5ItupRTTDrEIMOWusL7Icx2h5OE5zqy3o-JTU,198144
14
+ bigdl/cpp/libs/batched-bench.exe,sha256=xgNV70OpxqTECxtu5g-qrmQVpU6O9yemc2ooJXmiSmc,204800
15
+ bigdl/cpp/libs/batched.exe,sha256=Dd1xOTwFMxqkc6iIX6fUq4B14LlC6TedJweLHJNv7jc,207360
16
+ bigdl/cpp/libs/beam-search.exe,sha256=cOLg89-ljHrM9m3ihbyoHm_UrLLWilOunARWqEmhuws,212480
17
+ bigdl/cpp/libs/benchmark.exe,sha256=xngu4Y79hy7xLyHCi5OBRww9MNba1eKXp8Iu2rguB9I,74240
18
+ bigdl/cpp/libs/convert-llama2c-to-ggml.exe,sha256=BxWBBn9FGWMjmatL10VpKEUqk-N0X6vycOjhf0h1De4,126464
19
+ bigdl/cpp/libs/embedding.exe,sha256=X8ZFVeMTyPzQRWlgMvhNarTkuw2kChpMqzZUJJd-b_4,628224
20
+ bigdl/cpp/libs/export-lora.exe,sha256=OmaG5_dKlEwTwSIMWbk5vqIHXHREgx9wrl4bkQB6fkU,90624
21
+ bigdl/cpp/libs/finetune.exe,sha256=I-Jzm47hAgh-4wj4odl-racdisoMHdmDfZGRFvmcrbE,301056
22
+ bigdl/cpp/libs/ggml_shared.dll,sha256=AHhpeWs5Ujj9CQym3T3A2NGZTzlK33oMbiooYXSb4Uk,4128768
23
+ bigdl/cpp/libs/gguf.exe,sha256=Yn3Sj3owMpc7tIFBnHfEOFsMKqdaqBG6rQodzoh7xws,3666432
24
+ bigdl/cpp/libs/gritlm.exe,sha256=aaIMRhCwxWjU_TUnGGOj-CjdX1mYj-uphmS8VgBiXdc,623104
25
+ bigdl/cpp/libs/imatrix.exe,sha256=2lio_YuKkKk4JRerfFnyw9J4oF2Dj1sfCxRdoSZrtCY,660480
26
+ bigdl/cpp/libs/infill.exe,sha256=9d94vaePAPDo30qNOEtjOAN-wIYgjoNzQS5lMwV608s,727040
27
+ bigdl/cpp/libs/llama-bench.exe,sha256=IU1R9-Rg8DsgeqwcNrMlxpy6S1YRvCK-sB8O8kIL_bM,330752
28
+ bigdl/cpp/libs/llama.dll,sha256=x1wfQ--R13ItNTzDEPQvT4XfAWGHYtubdwg1Vsi-tnE,5621760
29
+ bigdl/cpp/libs/llava-cli.exe,sha256=gqn44wcUJovJLGPY9h-YA9GdBt8oPgxO6Z9f2zGmTuk,909312
30
+ bigdl/cpp/libs/llava_shared.dll,sha256=nsvJagmGiFSBLM9jYbgz18lx5yywMHQ8GwVzkHt4v2E,4397568
31
+ bigdl/cpp/libs/lookahead.exe,sha256=7LAxNgNPJpgwRn0nznb9rS6zj9pvNxjeXAcrn97kF_k,668672
32
+ bigdl/cpp/libs/lookup.exe,sha256=LwaoWrSGy72IX643LdVejPL2KOs22xN73UemSZE5p2M,702976
33
+ bigdl/cpp/libs/ls-sycl-device.exe,sha256=HLfkWKuQdoHwyptDqBPKnvMgN8nOSkltmIl9BAUT39w,10240
34
+ bigdl/cpp/libs/main.exe,sha256=pAPcorruM0JlqiaPZu0QrvlrCMTuJlAFxs-6qLyl2Hs,758784
35
+ bigdl/cpp/libs/ollama.exe,sha256=np5moHCnQ1CS6llHIRQ34rKG6Rgb2ELbHeoVMhZR1s4,63375573
36
+ bigdl/cpp/libs/parallel.exe,sha256=-vkdyvjs7V-wa6JJIb2J0TYu5Un998Ym7oBWF6WN8E0,686080
37
+ bigdl/cpp/libs/passkey.exe,sha256=nj__p65IvTwjz5NXuOefBqbLLe2pDuJEs89oZv7azHk,214528
38
+ bigdl/cpp/libs/perplexity.exe,sha256=KU4W2PZyRkZbAOqxf4HoO0yqzo9FiCVoECroumKau0c,774656
39
+ bigdl/cpp/libs/q8dot.exe,sha256=fLTgWEY2l7hvdChCwGIAJ4V_PYSiAs06Wfq3j0nidEQ,53760
40
+ bigdl/cpp/libs/quantize-stats.exe,sha256=rEXDH1ZGI4DKZGWeZqr_YhBODpPwPZUpVQG9NejHcKA,130560
41
+ bigdl/cpp/libs/quantize.exe,sha256=xIep7s309ZqV5in6UytI3pyeE11fDq4PJoeo2BHkdB0,233984
42
+ bigdl/cpp/libs/save-load-state.exe,sha256=dPDBy-390FfjJdRNyT3wDCVO-AOQR-cc-qQ3lRWilV0,621056
43
+ bigdl/cpp/libs/server.exe,sha256=vL9-altaO3mkhbyJDwpHXQliG9HJ5TFnjeUxEyMqUvc,1682432
44
+ bigdl/cpp/libs/simple.exe,sha256=ABlsMXwln3t8t1oBXQBStYLH_pfjoCqnwQ1J4Bkcogg,200704
45
+ bigdl/cpp/libs/speculative.exe,sha256=LpQQP8HEjpIYwff9-tQelhkN394k0iiYOGO2LagI0Qc,694784
46
+ bigdl/cpp/libs/tokenize.exe,sha256=O99zmZEFlHYcCx0eRxWlqEY-JnOxPe-uRzDLOqzunwU,180224
47
+ bigdl/cpp/libs/train-text-from-scratch.exe,sha256=RdDAH0gHsXpOaIWfvQJuKi_-E_HPcJctbssMbOSalOw,281600
48
+ bigdl/cpp/libs/vdot.exe,sha256=qRe_Q0hVWNMrE0wYOWrqnT-gkb_JJXvtFrY87FV1kK4,56320
49
+ bigdl_core_cpp-2.5.0b20240509.data/scripts/init-llama-cpp.bat,sha256=13AgSYRyzk6hlSz11dPyCMJeSoW9z8PZ9l85E_5GWxs,528
50
+ bigdl_core_cpp-2.5.0b20240509.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
51
+ bigdl_core_cpp-2.5.0b20240509.data/scripts/init-ollama.bat,sha256=OVX62Q7KH67z6rQrgQUwEc19E3eiAH-8HAIycDmySYc,350
52
+ bigdl_core_cpp-2.5.0b20240509.dist-info/METADATA,sha256=9iisfcxQYJKB5XgFheM8-6Ki-tcW9s0SvCOji54qi2Y,668
53
+ bigdl_core_cpp-2.5.0b20240509.dist-info/WHEEL,sha256=at4xwl6JdXdkZHxdo5ixTwJ7ENtVftSy2wqmsdmo_4U,98
54
+ bigdl_core_cpp-2.5.0b20240509.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
55
+ bigdl_core_cpp-2.5.0b20240509.dist-info/RECORD,,