bigdl-core-cpp 2.5.0b20240508__py3-none-win_amd64.whl → 2.5.0b20240510__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bigdl/cpp/convert-hf-to-gguf.py +3177 -0
- bigdl/cpp/convert.py +36 -24
- bigdl/cpp/gguf-py/gguf/constants.py +19 -3
- bigdl/cpp/gguf-py/gguf/gguf_reader.py +16 -3
- bigdl/cpp/gguf-py/gguf/gguf_writer.py +12 -1
- bigdl/cpp/gguf-py/gguf/tensor_mapping.py +2 -0
- bigdl/cpp/gguf-py/gguf/vocab.py +13 -29
- bigdl/cpp/libs/baby-llama.exe +0 -0
- bigdl/cpp/libs/batched-bench.exe +0 -0
- bigdl/cpp/libs/batched.exe +0 -0
- bigdl/cpp/libs/beam-search.exe +0 -0
- bigdl/cpp/libs/benchmark.exe +0 -0
- bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
- bigdl/cpp/libs/embedding.exe +0 -0
- bigdl/cpp/libs/export-lora.exe +0 -0
- bigdl/cpp/libs/finetune.exe +0 -0
- bigdl/cpp/libs/ggml_shared.dll +0 -0
- bigdl/cpp/libs/gguf.exe +0 -0
- bigdl/cpp/libs/gritlm.exe +0 -0
- bigdl/cpp/libs/imatrix.exe +0 -0
- bigdl/cpp/libs/infill.exe +0 -0
- bigdl/cpp/libs/llama-bench.exe +0 -0
- bigdl/cpp/libs/llama.dll +0 -0
- bigdl/cpp/libs/llava-cli.exe +0 -0
- bigdl/cpp/libs/llava_shared.dll +0 -0
- bigdl/cpp/libs/lookahead.exe +0 -0
- bigdl/cpp/libs/lookup.exe +0 -0
- bigdl/cpp/libs/ls-sycl-device.exe +0 -0
- bigdl/cpp/libs/main.exe +0 -0
- bigdl/cpp/libs/ollama.exe +0 -0
- bigdl/cpp/libs/parallel.exe +0 -0
- bigdl/cpp/libs/passkey.exe +0 -0
- bigdl/cpp/libs/perplexity.exe +0 -0
- bigdl/cpp/libs/q8dot.exe +0 -0
- bigdl/cpp/libs/quantize-stats.exe +0 -0
- bigdl/cpp/libs/quantize.exe +0 -0
- bigdl/cpp/libs/save-load-state.exe +0 -0
- bigdl/cpp/libs/server.exe +0 -0
- bigdl/cpp/libs/simple.exe +0 -0
- bigdl/cpp/libs/speculative.exe +0 -0
- bigdl/cpp/libs/tokenize.exe +0 -0
- bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
- bigdl/cpp/libs/vdot.exe +0 -0
- {bigdl_core_cpp-2.5.0b20240508.data → bigdl_core_cpp-2.5.0b20240510.data}/scripts/init-llama-cpp.bat +1 -0
- {bigdl_core_cpp-2.5.0b20240508.dist-info → bigdl_core_cpp-2.5.0b20240510.dist-info}/METADATA +1 -1
- bigdl_core_cpp-2.5.0b20240510.dist-info/RECORD +55 -0
- bigdl_core_cpp-2.5.0b20240508.dist-info/RECORD +0 -54
- {bigdl_core_cpp-2.5.0b20240508.data → bigdl_core_cpp-2.5.0b20240510.data}/scripts/init-llama-cpp.ps1 +0 -0
- {bigdl_core_cpp-2.5.0b20240508.data → bigdl_core_cpp-2.5.0b20240510.data}/scripts/init-ollama.bat +0 -0
- {bigdl_core_cpp-2.5.0b20240508.dist-info → bigdl_core_cpp-2.5.0b20240510.dist-info}/WHEEL +0 -0
- {bigdl_core_cpp-2.5.0b20240508.dist-info → bigdl_core_cpp-2.5.0b20240510.dist-info}/top_level.txt +0 -0
bigdl/cpp/convert.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env python3
|
2
2
|
from __future__ import annotations
|
3
3
|
|
4
|
+
import logging
|
4
5
|
import argparse
|
5
6
|
import concurrent.futures
|
6
7
|
import enum
|
@@ -35,6 +36,8 @@ import gguf
|
|
35
36
|
if TYPE_CHECKING:
|
36
37
|
from typing_extensions import Self, TypeAlias
|
37
38
|
|
39
|
+
logger = logging.getLogger("convert")
|
40
|
+
|
38
41
|
if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
|
39
42
|
faulthandler.register(signal.SIGUSR1)
|
40
43
|
|
@@ -643,7 +646,6 @@ class LlamaHfVocab(Vocab):
|
|
643
646
|
|
644
647
|
|
645
648
|
def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
|
646
|
-
# print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
|
647
649
|
if n_head_kv is not None and n_head != n_head_kv:
|
648
650
|
n_head = n_head_kv
|
649
651
|
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
@@ -1033,12 +1035,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False)
|
|
1033
1035
|
|
1034
1036
|
# Check for a vocab size mismatch
|
1035
1037
|
if params.n_vocab == vocab.vocab_size:
|
1036
|
-
|
1038
|
+
logger.warning("Ignoring added_tokens.json since model matches vocab size without it.")
|
1037
1039
|
return
|
1038
1040
|
|
1039
1041
|
if pad_vocab and params.n_vocab > vocab.vocab_size:
|
1040
1042
|
pad_count = params.n_vocab - vocab.vocab_size
|
1041
|
-
|
1043
|
+
logger.debug(
|
1042
1044
|
f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>"
|
1043
1045
|
)
|
1044
1046
|
for i in range(1, pad_count + 1):
|
@@ -1166,7 +1168,7 @@ class OutputFile:
|
|
1166
1168
|
elapsed = time.time() - start
|
1167
1169
|
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
1168
1170
|
padi = len(str(len(model)))
|
1169
|
-
|
1171
|
+
logger.info(
|
1170
1172
|
f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
|
1171
1173
|
)
|
1172
1174
|
self.gguf.write_tensor_data(ndarray)
|
@@ -1281,12 +1283,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
|
|
1281
1283
|
# HF models permut or pack some of the tensors, so we need to undo that
|
1282
1284
|
for i in itertools.count():
|
1283
1285
|
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
1284
|
-
|
1286
|
+
logger.debug(f"Permuting layer {i}")
|
1285
1287
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
|
1286
1288
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
|
1287
1289
|
# tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
1288
1290
|
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
1289
|
-
|
1291
|
+
logger.debug(f"Unpacking and permuting layer {i}")
|
1290
1292
|
tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
|
1291
1293
|
tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
|
1292
1294
|
tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
|
@@ -1299,15 +1301,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
|
|
1299
1301
|
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
|
1300
1302
|
if name_new is None:
|
1301
1303
|
if skip_unknown:
|
1302
|
-
|
1304
|
+
logger.warning(f"Unexpected tensor name: {name} - skipping")
|
1303
1305
|
continue
|
1304
1306
|
raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
|
1305
1307
|
|
1306
1308
|
if tensor_type in should_skip:
|
1307
|
-
|
1309
|
+
logger.debug(f"skipping tensor {name_new}")
|
1308
1310
|
continue
|
1309
1311
|
|
1310
|
-
|
1312
|
+
logger.debug(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
|
1311
1313
|
out[name_new] = lazy_tensor
|
1312
1314
|
|
1313
1315
|
return out
|
@@ -1372,7 +1374,7 @@ def load_some_model(path: Path) -> ModelPlus:
|
|
1372
1374
|
paths = find_multifile_paths(path)
|
1373
1375
|
models_plus: list[ModelPlus] = []
|
1374
1376
|
for path in paths:
|
1375
|
-
|
1377
|
+
logger.info(f"Loading model file {path}")
|
1376
1378
|
models_plus.append(lazy_load_file(path))
|
1377
1379
|
|
1378
1380
|
model_plus = merge_multifile_models(models_plus)
|
@@ -1413,7 +1415,7 @@ class VocabFactory:
|
|
1413
1415
|
else:
|
1414
1416
|
raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
|
1415
1417
|
|
1416
|
-
|
1418
|
+
logger.info(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
|
1417
1419
|
return vocab
|
1418
1420
|
|
1419
1421
|
def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
|
@@ -1438,19 +1440,19 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
|
|
1438
1440
|
}[file_type]
|
1439
1441
|
ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
|
1440
1442
|
if ret in model_paths:
|
1441
|
-
|
1443
|
+
logger.error(
|
1442
1444
|
f"Error: Default output path ({ret}) would overwrite the input. "
|
1443
|
-
"Please explicitly specify a path using --outfile
|
1445
|
+
"Please explicitly specify a path using --outfile.")
|
1444
1446
|
sys.exit(1)
|
1445
1447
|
return ret
|
1446
1448
|
|
1447
1449
|
|
1448
1450
|
def do_dump_model(model_plus: ModelPlus) -> None:
|
1449
|
-
print(f"model_plus.paths = {model_plus.paths!r}")
|
1450
|
-
print(f"model_plus.format = {model_plus.format!r}")
|
1451
|
-
print(f"model_plus.vocab = {model_plus.vocab!r}")
|
1451
|
+
print(f"model_plus.paths = {model_plus.paths!r}") # noqa: NP100
|
1452
|
+
print(f"model_plus.format = {model_plus.format!r}") # noqa: NP100
|
1453
|
+
print(f"model_plus.vocab = {model_plus.vocab!r}") # noqa: NP100
|
1452
1454
|
for name, lazy_tensor in model_plus.model.items():
|
1453
|
-
print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")
|
1455
|
+
print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") # noqa: NP100
|
1454
1456
|
|
1455
1457
|
|
1456
1458
|
def main(args_in: list[str] | None = None) -> None:
|
@@ -1473,8 +1475,18 @@ def main(args_in: list[str] | None = None) -> None:
|
|
1473
1475
|
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
|
1474
1476
|
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
|
1475
1477
|
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
1478
|
+
parser.add_argument("--verbose", action="store_true", help="increase output verbosity")
|
1476
1479
|
|
1477
1480
|
args = parser.parse_args(args_in)
|
1481
|
+
|
1482
|
+
if args.verbose:
|
1483
|
+
logging.basicConfig(level=logging.DEBUG)
|
1484
|
+
elif args.dump_single or args.dump:
|
1485
|
+
# Avoid printing anything besides the dump output
|
1486
|
+
logging.basicConfig(level=logging.WARNING)
|
1487
|
+
else:
|
1488
|
+
logging.basicConfig(level=logging.INFO)
|
1489
|
+
|
1478
1490
|
if args.no_vocab and args.vocab_only:
|
1479
1491
|
raise ValueError("--vocab-only does not make sense with --no-vocab")
|
1480
1492
|
|
@@ -1491,6 +1503,7 @@ def main(args_in: list[str] | None = None) -> None:
|
|
1491
1503
|
if args.dump:
|
1492
1504
|
do_dump_model(model_plus)
|
1493
1505
|
return
|
1506
|
+
|
1494
1507
|
endianess = gguf.GGUFEndian.LITTLE
|
1495
1508
|
if args.big_endian:
|
1496
1509
|
endianess = gguf.GGUFEndian.BIG
|
@@ -1513,7 +1526,7 @@ def main(args_in: list[str] | None = None) -> None:
|
|
1513
1526
|
"q8_0": GGMLFileType.MostlyQ8_0,
|
1514
1527
|
}[args.outtype]
|
1515
1528
|
|
1516
|
-
|
1529
|
+
logger.info(f"params = {params}")
|
1517
1530
|
|
1518
1531
|
model_parent_path = model_plus.paths[0].parent
|
1519
1532
|
vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
|
@@ -1528,15 +1541,14 @@ def main(args_in: list[str] | None = None) -> None:
|
|
1528
1541
|
outfile = args.outfile
|
1529
1542
|
OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
|
1530
1543
|
endianess=endianess, pad_vocab=args.pad_vocab)
|
1531
|
-
|
1544
|
+
logger.info(f"Wrote {outfile}")
|
1532
1545
|
return
|
1533
1546
|
|
1534
1547
|
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
|
1535
1548
|
vocab = model_plus.vocab
|
1536
1549
|
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1550
|
+
logger.info(f"Vocab info: {vocab}")
|
1551
|
+
logger.info(f"Special vocab info: {special_vocab}")
|
1540
1552
|
model = model_plus.model
|
1541
1553
|
model = convert_model_names(model, params, args.skip_unknown)
|
1542
1554
|
ftype = pick_output_type(model, args.outtype)
|
@@ -1544,11 +1556,11 @@ def main(args_in: list[str] | None = None) -> None:
|
|
1544
1556
|
outfile = args.outfile or default_outfile(model_plus.paths, ftype)
|
1545
1557
|
|
1546
1558
|
params.ftype = ftype
|
1547
|
-
|
1559
|
+
logger.info(f"Writing {outfile}, format {ftype}")
|
1548
1560
|
|
1549
1561
|
OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
|
1550
1562
|
concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
|
1551
|
-
|
1563
|
+
logger.info(f"Wrote {outfile}")
|
1552
1564
|
|
1553
1565
|
|
1554
1566
|
if __name__ == '__main__':
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import sys
|
4
3
|
from enum import Enum, IntEnum, auto
|
5
4
|
from typing import Any
|
6
5
|
|
@@ -72,6 +71,7 @@ class Keys:
|
|
72
71
|
|
73
72
|
class Tokenizer:
|
74
73
|
MODEL = "tokenizer.ggml.model"
|
74
|
+
PRE = "tokenizer.ggml.pre"
|
75
75
|
LIST = "tokenizer.ggml.tokens"
|
76
76
|
TOKEN_TYPE = "tokenizer.ggml.token_type"
|
77
77
|
TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count" # for BERT-style token types
|
@@ -124,6 +124,7 @@ class MODEL_ARCH(IntEnum):
|
|
124
124
|
QWEN2 = auto()
|
125
125
|
QWEN2MOE = auto()
|
126
126
|
PHI2 = auto()
|
127
|
+
PHI3 = auto()
|
127
128
|
PLAMO = auto()
|
128
129
|
CODESHELL = auto()
|
129
130
|
ORION = auto()
|
@@ -200,6 +201,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|
200
201
|
MODEL_ARCH.QWEN2: "qwen2",
|
201
202
|
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
202
203
|
MODEL_ARCH.PHI2: "phi2",
|
204
|
+
MODEL_ARCH.PHI3: "phi3",
|
203
205
|
MODEL_ARCH.PLAMO: "plamo",
|
204
206
|
MODEL_ARCH.CODESHELL: "codeshell",
|
205
207
|
MODEL_ARCH.ORION: "orion",
|
@@ -550,6 +552,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|
550
552
|
MODEL_TENSOR.FFN_DOWN,
|
551
553
|
MODEL_TENSOR.FFN_UP,
|
552
554
|
],
|
555
|
+
MODEL_ARCH.PHI3: [
|
556
|
+
MODEL_TENSOR.TOKEN_EMBD,
|
557
|
+
MODEL_TENSOR.OUTPUT_NORM,
|
558
|
+
MODEL_TENSOR.OUTPUT,
|
559
|
+
MODEL_TENSOR.ATTN_NORM,
|
560
|
+
MODEL_TENSOR.ATTN_QKV,
|
561
|
+
MODEL_TENSOR.ATTN_Q,
|
562
|
+
MODEL_TENSOR.ATTN_K,
|
563
|
+
MODEL_TENSOR.ATTN_V,
|
564
|
+
MODEL_TENSOR.ATTN_OUT,
|
565
|
+
MODEL_TENSOR.FFN_NORM,
|
566
|
+
MODEL_TENSOR.FFN_DOWN,
|
567
|
+
MODEL_TENSOR.FFN_UP,
|
568
|
+
],
|
553
569
|
MODEL_ARCH.CODESHELL: [
|
554
570
|
MODEL_TENSOR.TOKEN_EMBD,
|
555
571
|
MODEL_TENSOR.POS_EMBD,
|
@@ -837,8 +853,7 @@ class GGUFValueType(IntEnum):
|
|
837
853
|
return GGUFValueType.INT32
|
838
854
|
# TODO: need help with 64-bit types in Python
|
839
855
|
else:
|
840
|
-
|
841
|
-
sys.exit()
|
856
|
+
raise ValueError(f"Unknown type: {type(val)}")
|
842
857
|
|
843
858
|
|
844
859
|
# Note: Does not support GGML_QKK_64
|
@@ -924,6 +939,7 @@ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
|
924
939
|
|
925
940
|
# tokenization
|
926
941
|
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
942
|
+
KEY_TOKENIZER_PRE = Keys.Tokenizer.PRE
|
927
943
|
KEY_TOKENIZER_LIST = Keys.Tokenizer.LIST
|
928
944
|
KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
|
929
945
|
KEY_TOKENIZER_SCORES = Keys.Tokenizer.SCORES
|
@@ -4,6 +4,7 @@
|
|
4
4
|
#
|
5
5
|
from __future__ import annotations
|
6
6
|
|
7
|
+
import logging
|
7
8
|
import os
|
8
9
|
from collections import OrderedDict
|
9
10
|
from typing import Any, Literal, NamedTuple, TypeVar, Union
|
@@ -27,6 +28,7 @@ from gguf.constants import (
|
|
27
28
|
GGUFValueType,
|
28
29
|
)
|
29
30
|
|
31
|
+
logger = logging.getLogger(__name__)
|
30
32
|
|
31
33
|
READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
|
32
34
|
|
@@ -139,8 +141,13 @@ class GGUFReader:
|
|
139
141
|
|
140
142
|
def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
|
141
143
|
if field.name in self.fields:
|
142
|
-
|
143
|
-
|
144
|
+
# TODO: add option to generate error on duplicate keys
|
145
|
+
# raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
|
146
|
+
|
147
|
+
logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
|
148
|
+
self.fields[field.name + '_{}'.format(field.offset)] = field
|
149
|
+
else:
|
150
|
+
self.fields[field.name] = field
|
144
151
|
return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
|
145
152
|
|
146
153
|
def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
|
@@ -234,8 +241,14 @@ class GGUFReader:
|
|
234
241
|
|
235
242
|
def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
|
236
243
|
tensors = []
|
244
|
+
tensor_names = set() # keep track of name to prevent duplicated tensors
|
237
245
|
for field in fields:
|
238
246
|
_name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
|
247
|
+
# check if there's any tensor having same name already in the list
|
248
|
+
tensor_name = str(bytes(name_data), encoding = 'utf-8')
|
249
|
+
if tensor_name in tensor_names:
|
250
|
+
raise ValueError(f'Found duplicated tensor with name {tensor_name}')
|
251
|
+
tensor_names.add(tensor_name)
|
239
252
|
ggml_type = GGMLQuantizationType(raw_dtype[0])
|
240
253
|
n_elems = np.prod(dims)
|
241
254
|
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
|
@@ -267,7 +280,7 @@ class GGUFReader:
|
|
267
280
|
item_count = n_bytes
|
268
281
|
item_type = np.uint8
|
269
282
|
tensors.append(ReaderTensor(
|
270
|
-
name =
|
283
|
+
name = tensor_name,
|
271
284
|
tensor_type = ggml_type,
|
272
285
|
shape = dims,
|
273
286
|
n_elements = n_elems,
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import logging
|
3
4
|
import os
|
4
5
|
import shutil
|
5
6
|
import struct
|
@@ -24,6 +25,8 @@ from .constants import (
|
|
24
25
|
TokenType,
|
25
26
|
)
|
26
27
|
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
27
30
|
|
28
31
|
class WriterState(Enum):
|
29
32
|
EMPTY = auto()
|
@@ -63,10 +66,11 @@ class GGUFWriter:
|
|
63
66
|
self.kv_data_count = 0
|
64
67
|
self.ti_data = bytearray()
|
65
68
|
self.ti_data_count = 0
|
69
|
+
self.ti_names = set()
|
66
70
|
self.use_temp_file = use_temp_file
|
67
71
|
self.temp_file = None
|
68
72
|
self.tensors = []
|
69
|
-
|
73
|
+
logger.info("gguf: This GGUF file is for {0} Endian only".format(
|
70
74
|
"Big" if self.endianess == GGUFEndian.BIG else "Little",
|
71
75
|
))
|
72
76
|
self.state = WriterState.EMPTY
|
@@ -197,6 +201,10 @@ class GGUFWriter:
|
|
197
201
|
if self.state is not WriterState.EMPTY:
|
198
202
|
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
199
203
|
|
204
|
+
if name in self.ti_names:
|
205
|
+
raise ValueError(f'Duplicated tensor name {name}')
|
206
|
+
self.ti_names.add(name)
|
207
|
+
|
200
208
|
encoded_name = name.encode("utf8")
|
201
209
|
self.ti_data += self._pack("Q", len(encoded_name))
|
202
210
|
self.ti_data += encoded_name
|
@@ -422,6 +430,9 @@ class GGUFWriter:
|
|
422
430
|
def add_tokenizer_model(self, model: str) -> None:
|
423
431
|
self.add_string(Keys.Tokenizer.MODEL, model)
|
424
432
|
|
433
|
+
def add_tokenizer_pre(self, pre: str) -> None:
|
434
|
+
self.add_string(Keys.Tokenizer.PRE, pre)
|
435
|
+
|
425
436
|
def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
|
426
437
|
self.add_array(Keys.Tokenizer.LIST, tokens)
|
427
438
|
|
@@ -117,6 +117,7 @@ class TensorNameMap:
|
|
117
117
|
"h.{bid}.attn.c_attn", # gpt2
|
118
118
|
"transformer.h.{bid}.mixer.Wqkv", # phi2
|
119
119
|
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
|
120
|
+
"model.layers.{bid}.self_attn.qkv_proj" # phi3
|
120
121
|
),
|
121
122
|
|
122
123
|
# Attention query
|
@@ -234,6 +235,7 @@ class TensorNameMap:
|
|
234
235
|
"h.{bid}.mlp.c_fc", # gpt2
|
235
236
|
"transformer.h.{bid}.mlp.fc1", # phi2
|
236
237
|
"model.layers.{bid}.mlp.fc1", # phi2
|
238
|
+
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
237
239
|
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
238
240
|
"model.layers.{bid}.feed_forward.w3", # internlm2
|
239
241
|
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
bigdl/cpp/gguf-py/gguf/vocab.py
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import logging
|
3
4
|
import json
|
4
5
|
import os
|
5
|
-
import sys
|
6
6
|
from pathlib import Path
|
7
7
|
from typing import Any, Callable
|
8
8
|
|
9
9
|
from .gguf_writer import GGUFWriter
|
10
10
|
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
11
13
|
|
12
14
|
class SpecialVocab:
|
13
15
|
merges: list[str]
|
@@ -40,38 +42,29 @@ class SpecialVocab:
|
|
40
42
|
def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
|
41
43
|
if self.merges:
|
42
44
|
if not quiet:
|
43
|
-
|
45
|
+
logger.info(f'Adding {len(self.merges)} merge(s).')
|
44
46
|
gw.add_token_merges(self.merges)
|
45
47
|
elif self.load_merges:
|
46
|
-
|
47
|
-
'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
|
48
|
-
file = sys.stderr,
|
49
|
-
)
|
48
|
+
logger.warning('Adding merges requested but no merges found, output may be non-functional.')
|
50
49
|
for typ, tokid in self.special_token_ids.items():
|
51
50
|
id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
|
52
51
|
if id_handler is None:
|
53
|
-
|
54
|
-
f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
|
55
|
-
file = sys.stderr,
|
56
|
-
)
|
52
|
+
logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
|
57
53
|
continue
|
58
54
|
if not quiet:
|
59
|
-
|
55
|
+
logger.info(f'Setting special token type {typ} to {tokid}')
|
60
56
|
id_handler(tokid)
|
61
57
|
for typ, value in self.add_special_token.items():
|
62
58
|
add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
|
63
59
|
if add_handler is None:
|
64
|
-
|
65
|
-
f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
|
66
|
-
file = sys.stderr,
|
67
|
-
)
|
60
|
+
logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
|
68
61
|
continue
|
69
62
|
if not quiet:
|
70
|
-
|
63
|
+
logger.info(f'Setting add_{typ}_token to {value}')
|
71
64
|
add_handler(value)
|
72
65
|
if self.chat_template is not None:
|
73
66
|
if not quiet:
|
74
|
-
|
67
|
+
logger.info(f'Setting chat_template to {self.chat_template}')
|
75
68
|
gw.add_chat_template(self.chat_template)
|
76
69
|
|
77
70
|
def _load(self, path: Path) -> None:
|
@@ -99,10 +92,7 @@ class SpecialVocab:
|
|
99
92
|
continue
|
100
93
|
parts = line.split(None, 3)
|
101
94
|
if len(parts) != 2:
|
102
|
-
|
103
|
-
f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
|
104
|
-
file = sys.stderr,
|
105
|
-
)
|
95
|
+
logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
|
106
96
|
continue
|
107
97
|
merges.append(f'{parts[0]} {parts[1]}')
|
108
98
|
self.merges = merges
|
@@ -118,10 +108,7 @@ class SpecialVocab:
|
|
118
108
|
return
|
119
109
|
self.special_token_ids[typ] = tid
|
120
110
|
return
|
121
|
-
|
122
|
-
f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
|
123
|
-
file = sys.stderr,
|
124
|
-
)
|
111
|
+
logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
|
125
112
|
|
126
113
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
127
114
|
tokenizer_file = path / 'tokenizer.json'
|
@@ -144,10 +131,7 @@ class SpecialVocab:
|
|
144
131
|
if chat_template is None or isinstance(chat_template, (str, list)):
|
145
132
|
self.chat_template = chat_template
|
146
133
|
else:
|
147
|
-
|
148
|
-
f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
|
149
|
-
file = sys.stderr
|
150
|
-
)
|
134
|
+
logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
|
151
135
|
for typ in self.special_token_types:
|
152
136
|
add_entry = tokenizer_config.get(f'add_{typ}_token')
|
153
137
|
if isinstance(add_entry, bool):
|
bigdl/cpp/libs/baby-llama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/batched-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/batched.exe
CHANGED
Binary file
|
bigdl/cpp/libs/beam-search.exe
CHANGED
Binary file
|
bigdl/cpp/libs/benchmark.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/embedding.exe
CHANGED
Binary file
|
bigdl/cpp/libs/export-lora.exe
CHANGED
Binary file
|
bigdl/cpp/libs/finetune.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ggml_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/gguf.exe
CHANGED
Binary file
|
bigdl/cpp/libs/gritlm.exe
CHANGED
Binary file
|
bigdl/cpp/libs/imatrix.exe
CHANGED
Binary file
|
bigdl/cpp/libs/infill.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama-bench.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llama.dll
CHANGED
Binary file
|
bigdl/cpp/libs/llava-cli.exe
CHANGED
Binary file
|
bigdl/cpp/libs/llava_shared.dll
CHANGED
Binary file
|
bigdl/cpp/libs/lookahead.exe
CHANGED
Binary file
|
bigdl/cpp/libs/lookup.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/main.exe
CHANGED
Binary file
|
bigdl/cpp/libs/ollama.exe
CHANGED
Binary file
|
bigdl/cpp/libs/parallel.exe
CHANGED
Binary file
|
bigdl/cpp/libs/passkey.exe
CHANGED
Binary file
|
bigdl/cpp/libs/perplexity.exe
CHANGED
Binary file
|
bigdl/cpp/libs/q8dot.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/quantize.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/server.exe
CHANGED
Binary file
|
bigdl/cpp/libs/simple.exe
CHANGED
Binary file
|
bigdl/cpp/libs/speculative.exe
CHANGED
Binary file
|
bigdl/cpp/libs/tokenize.exe
CHANGED
Binary file
|
Binary file
|
bigdl/cpp/libs/vdot.exe
CHANGED
Binary file
|
@@ -0,0 +1,55 @@
|
|
1
|
+
bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
+
bigdl/cpp/convert-hf-to-gguf.py,sha256=67U2K3ix3i1lps7L57-qiTVu5-VvJQbAXcan4aU0KT4,146672
|
3
|
+
bigdl/cpp/convert.py,sha256=OPDsmbVairx1s6ftM-G8MhUpPpEwvF48yS8HhOB7_j4,65023
|
4
|
+
bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
|
+
bigdl/cpp/gguf-py/gguf/__init__.py,sha256=2eyRnO5rS0WNuzL_N159IRA66ybYkisoSETMSM-tGBQ,135
|
6
|
+
bigdl/cpp/gguf-py/gguf/constants.py,sha256=ovAbNrRVlAchqxDpHU3Cey24Kcq6pEmFTmOu_ZZ71ng,32165
|
7
|
+
bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
|
8
|
+
bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=e-yzOSzsKjOfaCzXQ8QsJjBIypdqFdI5NO9tCS3VUZk,12081
|
9
|
+
bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=TM3UeO9d7Y3_uB3GikB4iqZt0uJw7OydijnUEoiFYAA,20496
|
10
|
+
bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
+
bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=CxeOv5ncOICvskNibM1AFs24QUI6N3n_trU0Dvy8wu8,22290
|
12
|
+
bigdl/cpp/gguf-py/gguf/vocab.py,sha256=YiWxOW4ta0R3OI2CWeF3o7RIvJA6O0jQ9fxHnartxvU,6955
|
13
|
+
bigdl/cpp/libs/baby-llama.exe,sha256=iy_KBtnrlYCE9ZG8com7kX_mRZ6M-Ok5_AOpD7TnHI0,198144
|
14
|
+
bigdl/cpp/libs/batched-bench.exe,sha256=GxIC_lQLErihen1YzTbNEzJsnzPH2bCE6B093NynnnA,204800
|
15
|
+
bigdl/cpp/libs/batched.exe,sha256=HZZ_f893O0dxOs8SsdcFNrh8JzdmY2NzPZ0iUx3Dx7Q,207360
|
16
|
+
bigdl/cpp/libs/beam-search.exe,sha256=PLqsCA2LL9OFf7CYgB0RKNYKiatb_DrPbvGp94IqY7A,212480
|
17
|
+
bigdl/cpp/libs/benchmark.exe,sha256=FjZzyZkpoIAqGwTtvvxsOw00OuHJPCO15HnGH1IL66I,74240
|
18
|
+
bigdl/cpp/libs/convert-llama2c-to-ggml.exe,sha256=xyUHdo4wBGakfbokQQbZ2aOE4m2xaHcqqWqJ4uMfcYA,126464
|
19
|
+
bigdl/cpp/libs/embedding.exe,sha256=35h8XlVdx_Sin-fmrjrZV6GjnXmlECYJ0N3p9UqNXsI,628224
|
20
|
+
bigdl/cpp/libs/export-lora.exe,sha256=K0coBfqmmau9Ay0-vsyR9jnJh2CDVX92Si0pOFB17VQ,90624
|
21
|
+
bigdl/cpp/libs/finetune.exe,sha256=MZGzvkaoD9S1oB4IyI-vGnEAVCWlarKb6JSQ1xXYF5M,301056
|
22
|
+
bigdl/cpp/libs/ggml_shared.dll,sha256=Gh5VlAGJXlLONMVBunYQOSqX3qZa0kwuSgxEH0kCpe8,4230656
|
23
|
+
bigdl/cpp/libs/gguf.exe,sha256=YZjN8irTL2CxXqE-29SLIl5ZVpPnVZkwEAv0dItlhig,3768832
|
24
|
+
bigdl/cpp/libs/gritlm.exe,sha256=aSV_zafse0VeXtzgi9rViBnlN6N4LnxUhjKHNCpujwA,623104
|
25
|
+
bigdl/cpp/libs/imatrix.exe,sha256=r96d19DBbYnieJt9TofVBH_HODpmCt6U_EzixUZ56rM,660480
|
26
|
+
bigdl/cpp/libs/infill.exe,sha256=empguEM4BLHRqz4cUdRYLNTkC7M_SMTTCkD6qVmGHjA,727040
|
27
|
+
bigdl/cpp/libs/llama-bench.exe,sha256=rIvug8l2MEZ1yZrakPPSIG3r_j6tUMC_tbKLzae55E0,330752
|
28
|
+
bigdl/cpp/libs/llama.dll,sha256=IAWcUOEDvgdWpw_unCOpA4bweOUbYIDWVXptcKe-uE0,5724672
|
29
|
+
bigdl/cpp/libs/llava-cli.exe,sha256=pNpQVy7u9ZCZ0D8wTCyCFlKMNIcl4IwFjYLFqTZR4Y8,909312
|
30
|
+
bigdl/cpp/libs/llava_shared.dll,sha256=KKfUYGzLGJEStX6d7lGwXCgeV0DlyMS-C3nJD7U9mjg,4500480
|
31
|
+
bigdl/cpp/libs/lookahead.exe,sha256=IzAdr4DHj4KBcdbPTdOVfXc1o_H_Y0F5xq7yU0SyAx8,668672
|
32
|
+
bigdl/cpp/libs/lookup.exe,sha256=Ql0oes_th424iswCPsY2RxoQAzEAMEyk7h9F91SYJEM,702976
|
33
|
+
bigdl/cpp/libs/ls-sycl-device.exe,sha256=bffY6meqsUl9xEdYGBgCItza7cMlBIuonxQDI98jp4I,10240
|
34
|
+
bigdl/cpp/libs/main.exe,sha256=jIjAWLN6Ohjcbxfz2qrCgaSAWrGw8yPfGTBeNdkIwKw,758784
|
35
|
+
bigdl/cpp/libs/ollama.exe,sha256=GVcI_iTxoK9-hPIzmR2cwXzvVube_zOj6VE9Xk1587c,63376170
|
36
|
+
bigdl/cpp/libs/parallel.exe,sha256=RxHF9GmNGLll6kQJwDYOnp_dZJTjV5kq2nJwE47WkuY,686080
|
37
|
+
bigdl/cpp/libs/passkey.exe,sha256=732tT1NsGmyJf7IrUcAlN4Z6G7pleRvqW0lb4gPMhQc,214528
|
38
|
+
bigdl/cpp/libs/perplexity.exe,sha256=c26C5sbtzdOM_ge_f7gnjh_-oS8RKn2wAe2ve7vZ9Zg,774656
|
39
|
+
bigdl/cpp/libs/q8dot.exe,sha256=_iNuI2TBaAiSdmy_Cf4mafFKtcqB0IXm_jX51hYNtm0,53760
|
40
|
+
bigdl/cpp/libs/quantize-stats.exe,sha256=u907hbMNNh__8ZauYTQQsGe02pBtA_aMcAjnnAjF3vs,130560
|
41
|
+
bigdl/cpp/libs/quantize.exe,sha256=5wzH8RUsK6Xbo-hyf3OUDBGY7hyDBxfAEwAvFKm7zbw,233984
|
42
|
+
bigdl/cpp/libs/save-load-state.exe,sha256=-HyCr9BIszkJ4RtKXis-Actz4hWXvbEmu3359SYQPbE,621056
|
43
|
+
bigdl/cpp/libs/server.exe,sha256=dXd4HDqHmPalppQ8hHvqpgsYcD8TSAREaPg5Uq6jcws,1682432
|
44
|
+
bigdl/cpp/libs/simple.exe,sha256=jvCmqXL1k9K_U34cB34I2nTRI4lLNUASzx_Pdv6HTts,200704
|
45
|
+
bigdl/cpp/libs/speculative.exe,sha256=8Lf3UI9d5bQQH-f790ZGGIVkJf77TTARixQNylTu83E,694784
|
46
|
+
bigdl/cpp/libs/tokenize.exe,sha256=mj1Sy-6BkKfzK3lZvhu96_SRzHh8UjL2tP5ydi1gSQA,180224
|
47
|
+
bigdl/cpp/libs/train-text-from-scratch.exe,sha256=6ok4o-F74u93BHR87Lo98P5KqaNuLtHOpnr7qMtFGCM,281600
|
48
|
+
bigdl/cpp/libs/vdot.exe,sha256=8jsWdkMSNw6njNbAPODXjpjGeOC7nRyvD6ENOc_rpiY,56320
|
49
|
+
bigdl_core_cpp-2.5.0b20240510.data/scripts/init-llama-cpp.bat,sha256=13AgSYRyzk6hlSz11dPyCMJeSoW9z8PZ9l85E_5GWxs,528
|
50
|
+
bigdl_core_cpp-2.5.0b20240510.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
|
51
|
+
bigdl_core_cpp-2.5.0b20240510.data/scripts/init-ollama.bat,sha256=OVX62Q7KH67z6rQrgQUwEc19E3eiAH-8HAIycDmySYc,350
|
52
|
+
bigdl_core_cpp-2.5.0b20240510.dist-info/METADATA,sha256=y1KrEZsEyFlrEyUUkBNI_nMrCpSe4aR827YBROTEc4I,668
|
53
|
+
bigdl_core_cpp-2.5.0b20240510.dist-info/WHEEL,sha256=at4xwl6JdXdkZHxdo5ixTwJ7ENtVftSy2wqmsdmo_4U,98
|
54
|
+
bigdl_core_cpp-2.5.0b20240510.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
|
55
|
+
bigdl_core_cpp-2.5.0b20240510.dist-info/RECORD,,
|