PyPI - bigdl-core-cpp - Versions diffs - 2.5.0b20240507__py3-none-win_amd64.whl → 2.5.0b20240509__py3-none-win_amd64.whl - Mend

bigdl-core-cpp 2.5.0b20240507__py3-none-win_amd64.whl → 2.5.0b20240509__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

bigdl/cpp/convert-hf-to-gguf.py +3177 -0
bigdl/cpp/convert.py +36 -24
bigdl/cpp/gguf-py/gguf/constants.py +19 -3
bigdl/cpp/gguf-py/gguf/gguf_reader.py +16 -3
bigdl/cpp/gguf-py/gguf/gguf_writer.py +12 -1
bigdl/cpp/gguf-py/gguf/tensor_mapping.py +2 -0
bigdl/cpp/gguf-py/gguf/vocab.py +13 -29
bigdl/cpp/libs/baby-llama.exe +0 -0
bigdl/cpp/libs/batched-bench.exe +0 -0
bigdl/cpp/libs/batched.exe +0 -0
bigdl/cpp/libs/beam-search.exe +0 -0
bigdl/cpp/libs/benchmark.exe +0 -0
bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
bigdl/cpp/libs/embedding.exe +0 -0
bigdl/cpp/libs/export-lora.exe +0 -0
bigdl/cpp/libs/finetune.exe +0 -0
bigdl/cpp/libs/ggml_shared.dll +0 -0
bigdl/cpp/libs/gguf.exe +0 -0
bigdl/cpp/libs/gritlm.exe +0 -0
bigdl/cpp/libs/imatrix.exe +0 -0
bigdl/cpp/libs/infill.exe +0 -0
bigdl/cpp/libs/llama-bench.exe +0 -0
bigdl/cpp/libs/llama.dll +0 -0
bigdl/cpp/libs/llava-cli.exe +0 -0
bigdl/cpp/libs/llava_shared.dll +0 -0
bigdl/cpp/libs/lookahead.exe +0 -0
bigdl/cpp/libs/lookup.exe +0 -0
bigdl/cpp/libs/ls-sycl-device.exe +0 -0
bigdl/cpp/libs/main.exe +0 -0
bigdl/cpp/libs/ollama.exe +0 -0
bigdl/cpp/libs/parallel.exe +0 -0
bigdl/cpp/libs/passkey.exe +0 -0
bigdl/cpp/libs/perplexity.exe +0 -0
bigdl/cpp/libs/q8dot.exe +0 -0
bigdl/cpp/libs/quantize-stats.exe +0 -0
bigdl/cpp/libs/quantize.exe +0 -0
bigdl/cpp/libs/save-load-state.exe +0 -0
bigdl/cpp/libs/server.exe +0 -0
bigdl/cpp/libs/simple.exe +0 -0
bigdl/cpp/libs/speculative.exe +0 -0
bigdl/cpp/libs/tokenize.exe +0 -0
bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
bigdl/cpp/libs/vdot.exe +0 -0
{bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-llama-cpp.bat +1 -0
{bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/METADATA +1 -1
bigdl_core_cpp-2.5.0b20240509.dist-info/RECORD +55 -0
bigdl_core_cpp-2.5.0b20240507.dist-info/RECORD +0 -54
{bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-llama-cpp.ps1 +0 -0
{bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-ollama.bat +0 -0
{bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/WHEEL +0 -0
{bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/top_level.txt +0 -0

bigdl/cpp/convert.py CHANGED Viewed

@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 from __future__ import annotations
+import logging
 import argparse
 import concurrent.futures
 import enum
@@ -35,6 +36,8 @@ import gguf
 if TYPE_CHECKING:
     from typing_extensions import Self, TypeAlias
+logger = logging.getLogger("convert")
 if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
     faulthandler.register(signal.SIGUSR1)
@@ -643,7 +646,6 @@ class LlamaHfVocab(Vocab):
 def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray:
-    # print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) )
     if n_head_kv is not None and n_head != n_head_kv:
         n_head = n_head_kv
     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
@@ -1033,12 +1035,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False)
     # Check for a vocab size mismatch
     if params.n_vocab == vocab.vocab_size:
-        print("Ignoring added_tokens.json since model matches vocab size without it.")
+        logger.warning("Ignoring added_tokens.json since model matches vocab size without it.")
         return
     if pad_vocab and params.n_vocab > vocab.vocab_size:
         pad_count = params.n_vocab - vocab.vocab_size
-        print(
+        logger.debug(
             f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>"
         )
         for i in range(1, pad_count + 1):
@@ -1166,7 +1168,7 @@ class OutputFile:
             elapsed = time.time() - start
             size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
             padi = len(str(len(model)))
-            print(
+            logger.info(
                 f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
             )
             self.gguf.write_tensor_data(ndarray)
@@ -1281,12 +1283,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
     # HF models permut or pack some of the tensors, so we need to undo that
     for i in itertools.count():
         if f"model.layers.{i}.self_attn.q_proj.weight" in model:
-            print(f"Permuting layer {i}")
+            logger.debug(f"Permuting layer {i}")
             tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head)
             tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv)
             # tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"]
         elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
-            print(f"Unpacking and permuting layer {i}")
+            logger.debug(f"Unpacking and permuting layer {i}")
             tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head)
             tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv)
             tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
@@ -1299,15 +1301,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) ->
         tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
         if name_new is None:
             if skip_unknown:
-                print(f"Unexpected tensor name: {name} - skipping")
+                logger.warning(f"Unexpected tensor name: {name} - skipping")
                 continue
             raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
         if tensor_type in should_skip:
-            print(f"skipping tensor {name_new}")
+            logger.debug(f"skipping tensor {name_new}")
             continue
-        print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
+        logger.debug(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}")
         out[name_new] = lazy_tensor
     return out
@@ -1372,7 +1374,7 @@ def load_some_model(path: Path) -> ModelPlus:
     paths = find_multifile_paths(path)
     models_plus: list[ModelPlus] = []
     for path in paths:
-        print(f"Loading model file {path}")
+        logger.info(f"Loading model file {path}")
         models_plus.append(lazy_load_file(path))
     model_plus = merge_multifile_models(models_plus)
@@ -1413,7 +1415,7 @@ class VocabFactory:
         else:
             raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}")
-        print(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
+        logger.info(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}")
         return vocab
     def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]:
@@ -1438,19 +1440,19 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path:
     }[file_type]
     ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf"
     if ret in model_paths:
-        sys.stderr.write(
+        logger.error(
             f"Error: Default output path ({ret}) would overwrite the input. "
-            "Please explicitly specify a path using --outfile.\n")
+            "Please explicitly specify a path using --outfile.")
         sys.exit(1)
     return ret
 def do_dump_model(model_plus: ModelPlus) -> None:
-    print(f"model_plus.paths = {model_plus.paths!r}")
-    print(f"model_plus.format = {model_plus.format!r}")
-    print(f"model_plus.vocab = {model_plus.vocab!r}")
+    print(f"model_plus.paths = {model_plus.paths!r}") # noqa: NP100
+    print(f"model_plus.format = {model_plus.format!r}") # noqa: NP100
+    print(f"model_plus.vocab = {model_plus.vocab!r}") # noqa: NP100
     for name, lazy_tensor in model_plus.model.items():
-        print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}")
+        print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") # noqa: NP100
 def main(args_in: list[str] | None = None) -> None:
@@ -1473,8 +1475,18 @@ def main(args_in: list[str] | None = None) -> None:
     parser.add_argument("--big-endian",   action="store_true",    help="model is executed on big endian machine")
     parser.add_argument("--pad-vocab",    action="store_true",    help="add pad tokens when model vocab expects more than tokenizer metadata provides")
     parser.add_argument("--skip-unknown", action="store_true",    help="skip unknown tensor names instead of failing")
+    parser.add_argument("--verbose",      action="store_true",    help="increase output verbosity")
     args = parser.parse_args(args_in)
+    if args.verbose:
+        logging.basicConfig(level=logging.DEBUG)
+    elif args.dump_single or args.dump:
+        # Avoid printing anything besides the dump output
+        logging.basicConfig(level=logging.WARNING)
+    else:
+        logging.basicConfig(level=logging.INFO)
     if args.no_vocab and args.vocab_only:
         raise ValueError("--vocab-only does not make sense with --no-vocab")
@@ -1491,6 +1503,7 @@ def main(args_in: list[str] | None = None) -> None:
     if args.dump:
         do_dump_model(model_plus)
         return
     endianess = gguf.GGUFEndian.LITTLE
     if args.big_endian:
         endianess = gguf.GGUFEndian.BIG
@@ -1513,7 +1526,7 @@ def main(args_in: list[str] | None = None) -> None:
             "q8_0": GGMLFileType.MostlyQ8_0,
         }[args.outtype]
-    print(f"params = {params}")
+    logger.info(f"params = {params}")
     model_parent_path = model_plus.paths[0].parent
     vocab_path = Path(args.vocab_dir or args.model or model_parent_path)
@@ -1528,15 +1541,14 @@ def main(args_in: list[str] | None = None) -> None:
         outfile = args.outfile
         OutputFile.write_vocab_only(outfile, params, vocab, special_vocab,
                                     endianess=endianess, pad_vocab=args.pad_vocab)
-        print(f"Wrote {outfile}")
+        logger.info(f"Wrote {outfile}")
         return
     if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
         vocab = model_plus.vocab
-    print(f"Vocab info: {vocab}")
-    print(f"Special vocab info: {special_vocab}")
+    logger.info(f"Vocab info: {vocab}")
+    logger.info(f"Special vocab info: {special_vocab}")
     model   = model_plus.model
     model   = convert_model_names(model, params, args.skip_unknown)
     ftype   = pick_output_type(model, args.outtype)
@@ -1544,11 +1556,11 @@ def main(args_in: list[str] | None = None) -> None:
     outfile = args.outfile or default_outfile(model_plus.paths, ftype)
     params.ftype = ftype
-    print(f"Writing {outfile}, format {ftype}")
+    logger.info(f"Writing {outfile}, format {ftype}")
     OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab,
                          concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab)
-    print(f"Wrote {outfile}")
+    logger.info(f"Wrote {outfile}")
 if __name__ == '__main__':

bigdl/cpp/gguf-py/gguf/constants.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
-import sys
 from enum import Enum, IntEnum, auto
 from typing import Any
@@ -72,6 +71,7 @@ class Keys:
     class Tokenizer:
         MODEL            = "tokenizer.ggml.model"
+        PRE              = "tokenizer.ggml.pre"
         LIST             = "tokenizer.ggml.tokens"
         TOKEN_TYPE       = "tokenizer.ggml.token_type"
         TOKEN_TYPE_COUNT = "tokenizer.ggml.token_type_count"  # for BERT-style token types
@@ -124,6 +124,7 @@ class MODEL_ARCH(IntEnum):
     QWEN2      = auto()
     QWEN2MOE   = auto()
     PHI2       = auto()
+    PHI3       = auto()
     PLAMO      = auto()
     CODESHELL  = auto()
     ORION      = auto()
@@ -200,6 +201,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
     MODEL_ARCH.QWEN2:          "qwen2",
     MODEL_ARCH.QWEN2MOE:       "qwen2moe",
     MODEL_ARCH.PHI2:           "phi2",
+    MODEL_ARCH.PHI3:           "phi3",
     MODEL_ARCH.PLAMO:          "plamo",
     MODEL_ARCH.CODESHELL:      "codeshell",
     MODEL_ARCH.ORION:          "orion",
@@ -550,6 +552,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
         MODEL_TENSOR.FFN_DOWN,
         MODEL_TENSOR.FFN_UP,
     ],
+    MODEL_ARCH.PHI3: [
+        MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.OUTPUT_NORM,
+        MODEL_TENSOR.OUTPUT,
+        MODEL_TENSOR.ATTN_NORM,
+        MODEL_TENSOR.ATTN_QKV,
+        MODEL_TENSOR.ATTN_Q,
+        MODEL_TENSOR.ATTN_K,
+        MODEL_TENSOR.ATTN_V,
+        MODEL_TENSOR.ATTN_OUT,
+        MODEL_TENSOR.FFN_NORM,
+        MODEL_TENSOR.FFN_DOWN,
+        MODEL_TENSOR.FFN_UP,
+    ],
     MODEL_ARCH.CODESHELL: [
         MODEL_TENSOR.TOKEN_EMBD,
         MODEL_TENSOR.POS_EMBD,
@@ -837,8 +853,7 @@ class GGUFValueType(IntEnum):
             return GGUFValueType.INT32
         # TODO: need help with 64-bit types in Python
         else:
-            print("Unknown type:", type(val))
-            sys.exit()
+            raise ValueError(f"Unknown type: {type(val)}")
 # Note: Does not support GGML_QKK_64
@@ -924,6 +939,7 @@ KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
 # tokenization
 KEY_TOKENIZER_MODEL      = Keys.Tokenizer.MODEL
+KEY_TOKENIZER_PRE        = Keys.Tokenizer.PRE
 KEY_TOKENIZER_LIST       = Keys.Tokenizer.LIST
 KEY_TOKENIZER_TOKEN_TYPE = Keys.Tokenizer.TOKEN_TYPE
 KEY_TOKENIZER_SCORES     = Keys.Tokenizer.SCORES

bigdl/cpp/gguf-py/gguf/gguf_reader.py CHANGED Viewed

@@ -4,6 +4,7 @@
 #
 from __future__ import annotations
+import logging
 import os
 from collections import OrderedDict
 from typing import Any, Literal, NamedTuple, TypeVar, Union
@@ -27,6 +28,7 @@ from gguf.constants import (
     GGUFValueType,
 )
+logger = logging.getLogger(__name__)
 READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
@@ -139,8 +141,13 @@ class GGUFReader:
     def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
         if field.name in self.fields:
-            raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
-        self.fields[field.name] = field
+            # TODO: add option to generate error on duplicate keys
+            # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
+            logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
+            self.fields[field.name + '_{}'.format(field.offset)] = field
+        else:
+            self.fields[field.name] = field
         return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
     def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
@@ -234,8 +241,14 @@ class GGUFReader:
     def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
         tensors = []
+        tensor_names = set() # keep track of name to prevent duplicated tensors
         for field in fields:
             _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
+            # check if there's any tensor having same name already in the list
+            tensor_name = str(bytes(name_data), encoding = 'utf-8')
+            if tensor_name in tensor_names:
+                raise ValueError(f'Found duplicated tensor with name {tensor_name}')
+            tensor_names.add(tensor_name)
             ggml_type = GGMLQuantizationType(raw_dtype[0])
             n_elems = np.prod(dims)
             block_size, type_size = GGML_QUANT_SIZES[ggml_type]
@@ -267,7 +280,7 @@ class GGUFReader:
                 item_count = n_bytes
                 item_type = np.uint8
             tensors.append(ReaderTensor(
-                name = str(bytes(name_data), encoding = 'utf-8'),
+                name = tensor_name,
                 tensor_type = ggml_type,
                 shape = dims,
                 n_elements = n_elems,

bigdl/cpp/gguf-py/gguf/gguf_writer.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import logging
 import os
 import shutil
 import struct
@@ -24,6 +25,8 @@ from .constants import (
     TokenType,
 )
+logger = logging.getLogger(__name__)
 class WriterState(Enum):
     EMPTY   = auto()
@@ -63,10 +66,11 @@ class GGUFWriter:
         self.kv_data_count = 0
         self.ti_data = bytearray()
         self.ti_data_count = 0
+        self.ti_names = set()
         self.use_temp_file = use_temp_file
         self.temp_file = None
         self.tensors = []
-        print("gguf: This GGUF file is for {0} Endian only".format(
+        logger.info("gguf: This GGUF file is for {0} Endian only".format(
             "Big" if self.endianess == GGUFEndian.BIG else "Little",
         ))
         self.state = WriterState.EMPTY
@@ -197,6 +201,10 @@ class GGUFWriter:
         if self.state is not WriterState.EMPTY:
             raise ValueError(f'Expected output file to be empty, got {self.state}')
+        if name in self.ti_names:
+            raise ValueError(f'Duplicated tensor name {name}')
+        self.ti_names.add(name)
         encoded_name = name.encode("utf8")
         self.ti_data += self._pack("Q", len(encoded_name))
         self.ti_data += encoded_name
@@ -422,6 +430,9 @@ class GGUFWriter:
     def add_tokenizer_model(self, model: str) -> None:
         self.add_string(Keys.Tokenizer.MODEL, model)
+    def add_tokenizer_pre(self, pre: str) -> None:
+        self.add_string(Keys.Tokenizer.PRE, pre)
     def add_token_list(self, tokens: Sequence[str] | Sequence[bytes] | Sequence[bytearray]) -> None:
         self.add_array(Keys.Tokenizer.LIST, tokens)

bigdl/cpp/gguf-py/gguf/tensor_mapping.py CHANGED Viewed

@@ -117,6 +117,7 @@ class TensorNameMap:
             "h.{bid}.attn.c_attn",                                                 # gpt2
             "transformer.h.{bid}.mixer.Wqkv",                                      # phi2
             "encoder.layers.{bid}.attn.Wqkv",                                      # nomic-bert
+            "model.layers.{bid}.self_attn.qkv_proj"                                # phi3
         ),
         # Attention query
@@ -234,6 +235,7 @@ class TensorNameMap:
             "h.{bid}.mlp.c_fc",                                       # gpt2
             "transformer.h.{bid}.mlp.fc1",                            # phi2
             "model.layers.{bid}.mlp.fc1",                             # phi2
+            "model.layers.{bid}.mlp.gate_up_proj",                    # phi3
             "model.layers.layers.{bid}.mlp.up_proj",                  # plamo
             "model.layers.{bid}.feed_forward.w3",                     # internlm2
             "encoder.layers.{bid}.mlp.fc11",                          # nomic-bert

bigdl/cpp/gguf-py/gguf/vocab.py CHANGED Viewed

@@ -1,13 +1,15 @@
 from __future__ import annotations
+import logging
 import json
 import os
-import sys
 from pathlib import Path
 from typing import Any, Callable
 from .gguf_writer import GGUFWriter
+logger = logging.getLogger(__name__)
 class SpecialVocab:
     merges: list[str]
@@ -40,38 +42,29 @@ class SpecialVocab:
     def add_to_gguf(self, gw: GGUFWriter, quiet: bool = False) -> None:
         if self.merges:
             if not quiet:
-                print(f'gguf: Adding {len(self.merges)} merge(s).')
+                logger.info(f'Adding {len(self.merges)} merge(s).')
             gw.add_token_merges(self.merges)
         elif self.load_merges:
-            print(
-                'gguf: WARNING: Adding merges requested but no merges found, output may be non-functional.',
-                file = sys.stderr,
-            )
+            logger.warning('Adding merges requested but no merges found, output may be non-functional.')
         for typ, tokid in self.special_token_ids.items():
             id_handler: Callable[[int], None] | None = getattr(gw, f'add_{typ}_token_id', None)
             if id_handler is None:
-                print(
-                    f'gguf: WARNING: No handler for special token type {typ} with id {tokid} - skipping',
-                    file = sys.stderr,
-                )
+                logger.warning(f'No handler for special token type {typ} with id {tokid} - skipping')
                 continue
             if not quiet:
-                print(f'gguf: Setting special token type {typ} to {tokid}')
+                logger.info(f'Setting special token type {typ} to {tokid}')
             id_handler(tokid)
         for typ, value in self.add_special_token.items():
             add_handler: Callable[[bool], None] | None = getattr(gw, f'add_add_{typ}_token', None)
             if add_handler is None:
-                print(
-                    f'gguf: WARNING: No handler for add_{typ}_token with value {value} - skipping',
-                    file = sys.stderr,
-                )
+                logger.warning(f'No handler for add_{typ}_token with value {value} - skipping')
                 continue
             if not quiet:
-                print(f'gguf: Setting add_{typ}_token to {value}')
+                logger.info(f'Setting add_{typ}_token to {value}')
             add_handler(value)
         if self.chat_template is not None:
             if not quiet:
-                print(f'gguf: Setting chat_template to {self.chat_template}')
+                logger.info(f'Setting chat_template to {self.chat_template}')
             gw.add_chat_template(self.chat_template)
     def _load(self, path: Path) -> None:
@@ -99,10 +92,7 @@ class SpecialVocab:
                     continue
                 parts = line.split(None, 3)
                 if len(parts) != 2:
-                    print(
-                        f'gguf: WARNING: {merges_file.name}: Line {line_num}: Entry malformed, ignoring',
-                        file = sys.stderr,
-                    )
+                    logger.warning(f'{merges_file.name}: Line {line_num}: Entry malformed, ignoring')
                     continue
                 merges.append(f'{parts[0]} {parts[1]}')
         self.merges = merges
@@ -118,10 +108,7 @@ class SpecialVocab:
                 return
             self.special_token_ids[typ] = tid
             return
-        print(
-            f'gguf: WARNING: Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping',
-            file = sys.stderr,
-        )
+        logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping')
     def _try_load_from_tokenizer_json(self, path: Path) -> bool:
         tokenizer_file = path / 'tokenizer.json'
@@ -144,10 +131,7 @@ class SpecialVocab:
         if chat_template is None or isinstance(chat_template, (str, list)):
             self.chat_template = chat_template
         else:
-            print(
-                f'gguf: WARNING: Bad type for chat_template field in {tokenizer_config_file!r} - ignoring',
-                file = sys.stderr
-            )
+            logger.warning(f'Bad type for chat_template field in {tokenizer_config_file!r} - ignoring')
         for typ in self.special_token_types:
             add_entry = tokenizer_config.get(f'add_{typ}_token')
             if isinstance(add_entry, bool):

bigdl/cpp/libs/baby-llama.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/batched-bench.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/batched.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/beam-search.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/benchmark.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/convert-llama2c-to-ggml.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/embedding.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/export-lora.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/finetune.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ggml_shared.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/gguf.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/gritlm.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/imatrix.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/infill.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama-bench.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llama.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/llava-cli.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/llava_shared.dll CHANGED Viewed

Binary file

bigdl/cpp/libs/lookahead.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/lookup.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ls-sycl-device.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/main.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/ollama.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/parallel.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/passkey.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/perplexity.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/q8dot.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/quantize-stats.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/quantize.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/save-load-state.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/server.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/simple.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/speculative.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/tokenize.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/train-text-from-scratch.exe CHANGED Viewed

Binary file

bigdl/cpp/libs/vdot.exe CHANGED Viewed

Binary file

{bigdl_core_cpp-2.5.0b20240507.data → bigdl_core_cpp-2.5.0b20240509.data}/scripts/init-llama-cpp.bat RENAMED Viewed

@@ -15,4 +15,5 @@ for %%f in (*) do (
 popd
 copy "%cpp_dir%\convert.py" .
+copy "%cpp_dir%\convert-hf-to-gguf.py" .
 xcopy /E /I "%cpp_dir%\gguf-py\" .\gguf-py

{bigdl_core_cpp-2.5.0b20240507.dist-info → bigdl_core_cpp-2.5.0b20240509.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bigdl-core-cpp
-Version: 2.5.0b20240507
+Version: 2.5.0b20240509
 Summary: Large Language Model Develop Toolkit
 Author: BigDL Authors
 License: Apache License, Version 2.0

bigdl_core_cpp-2.5.0b20240509.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,55 @@
+bigdl/cpp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bigdl/cpp/convert-hf-to-gguf.py,sha256=67U2K3ix3i1lps7L57-qiTVu5-VvJQbAXcan4aU0KT4,146672
+bigdl/cpp/convert.py,sha256=OPDsmbVairx1s6ftM-G8MhUpPpEwvF48yS8HhOB7_j4,65023
+bigdl/cpp/gguf-py/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bigdl/cpp/gguf-py/gguf/__init__.py,sha256=2eyRnO5rS0WNuzL_N159IRA66ybYkisoSETMSM-tGBQ,135
+bigdl/cpp/gguf-py/gguf/constants.py,sha256=ovAbNrRVlAchqxDpHU3Cey24Kcq6pEmFTmOu_ZZ71ng,32165
+bigdl/cpp/gguf-py/gguf/gguf.py,sha256=QpLc-xU055W2d7CEFvJp2gLIfGO63bdM24ZndZCH6rw,493
+bigdl/cpp/gguf-py/gguf/gguf_reader.py,sha256=e-yzOSzsKjOfaCzXQ8QsJjBIypdqFdI5NO9tCS3VUZk,12081
+bigdl/cpp/gguf-py/gguf/gguf_writer.py,sha256=TM3UeO9d7Y3_uB3GikB4iqZt0uJw7OydijnUEoiFYAA,20496
+bigdl/cpp/gguf-py/gguf/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+bigdl/cpp/gguf-py/gguf/tensor_mapping.py,sha256=CxeOv5ncOICvskNibM1AFs24QUI6N3n_trU0Dvy8wu8,22290
+bigdl/cpp/gguf-py/gguf/vocab.py,sha256=YiWxOW4ta0R3OI2CWeF3o7RIvJA6O0jQ9fxHnartxvU,6955
+bigdl/cpp/libs/baby-llama.exe,sha256=TYHwbN5ItupRTTDrEIMOWusL7Icx2h5OE5zqy3o-JTU,198144
+bigdl/cpp/libs/batched-bench.exe,sha256=xgNV70OpxqTECxtu5g-qrmQVpU6O9yemc2ooJXmiSmc,204800
+bigdl/cpp/libs/batched.exe,sha256=Dd1xOTwFMxqkc6iIX6fUq4B14LlC6TedJweLHJNv7jc,207360
+bigdl/cpp/libs/beam-search.exe,sha256=cOLg89-ljHrM9m3ihbyoHm_UrLLWilOunARWqEmhuws,212480
+bigdl/cpp/libs/benchmark.exe,sha256=xngu4Y79hy7xLyHCi5OBRww9MNba1eKXp8Iu2rguB9I,74240
+bigdl/cpp/libs/convert-llama2c-to-ggml.exe,sha256=BxWBBn9FGWMjmatL10VpKEUqk-N0X6vycOjhf0h1De4,126464
+bigdl/cpp/libs/embedding.exe,sha256=X8ZFVeMTyPzQRWlgMvhNarTkuw2kChpMqzZUJJd-b_4,628224
+bigdl/cpp/libs/export-lora.exe,sha256=OmaG5_dKlEwTwSIMWbk5vqIHXHREgx9wrl4bkQB6fkU,90624
+bigdl/cpp/libs/finetune.exe,sha256=I-Jzm47hAgh-4wj4odl-racdisoMHdmDfZGRFvmcrbE,301056
+bigdl/cpp/libs/ggml_shared.dll,sha256=AHhpeWs5Ujj9CQym3T3A2NGZTzlK33oMbiooYXSb4Uk,4128768
+bigdl/cpp/libs/gguf.exe,sha256=Yn3Sj3owMpc7tIFBnHfEOFsMKqdaqBG6rQodzoh7xws,3666432
+bigdl/cpp/libs/gritlm.exe,sha256=aaIMRhCwxWjU_TUnGGOj-CjdX1mYj-uphmS8VgBiXdc,623104
+bigdl/cpp/libs/imatrix.exe,sha256=2lio_YuKkKk4JRerfFnyw9J4oF2Dj1sfCxRdoSZrtCY,660480
+bigdl/cpp/libs/infill.exe,sha256=9d94vaePAPDo30qNOEtjOAN-wIYgjoNzQS5lMwV608s,727040
+bigdl/cpp/libs/llama-bench.exe,sha256=IU1R9-Rg8DsgeqwcNrMlxpy6S1YRvCK-sB8O8kIL_bM,330752
+bigdl/cpp/libs/llama.dll,sha256=x1wfQ--R13ItNTzDEPQvT4XfAWGHYtubdwg1Vsi-tnE,5621760
+bigdl/cpp/libs/llava-cli.exe,sha256=gqn44wcUJovJLGPY9h-YA9GdBt8oPgxO6Z9f2zGmTuk,909312
+bigdl/cpp/libs/llava_shared.dll,sha256=nsvJagmGiFSBLM9jYbgz18lx5yywMHQ8GwVzkHt4v2E,4397568
+bigdl/cpp/libs/lookahead.exe,sha256=7LAxNgNPJpgwRn0nznb9rS6zj9pvNxjeXAcrn97kF_k,668672
+bigdl/cpp/libs/lookup.exe,sha256=LwaoWrSGy72IX643LdVejPL2KOs22xN73UemSZE5p2M,702976
+bigdl/cpp/libs/ls-sycl-device.exe,sha256=HLfkWKuQdoHwyptDqBPKnvMgN8nOSkltmIl9BAUT39w,10240
+bigdl/cpp/libs/main.exe,sha256=pAPcorruM0JlqiaPZu0QrvlrCMTuJlAFxs-6qLyl2Hs,758784
+bigdl/cpp/libs/ollama.exe,sha256=np5moHCnQ1CS6llHIRQ34rKG6Rgb2ELbHeoVMhZR1s4,63375573
+bigdl/cpp/libs/parallel.exe,sha256=-vkdyvjs7V-wa6JJIb2J0TYu5Un998Ym7oBWF6WN8E0,686080
+bigdl/cpp/libs/passkey.exe,sha256=nj__p65IvTwjz5NXuOefBqbLLe2pDuJEs89oZv7azHk,214528
+bigdl/cpp/libs/perplexity.exe,sha256=KU4W2PZyRkZbAOqxf4HoO0yqzo9FiCVoECroumKau0c,774656
+bigdl/cpp/libs/q8dot.exe,sha256=fLTgWEY2l7hvdChCwGIAJ4V_PYSiAs06Wfq3j0nidEQ,53760
+bigdl/cpp/libs/quantize-stats.exe,sha256=rEXDH1ZGI4DKZGWeZqr_YhBODpPwPZUpVQG9NejHcKA,130560
+bigdl/cpp/libs/quantize.exe,sha256=xIep7s309ZqV5in6UytI3pyeE11fDq4PJoeo2BHkdB0,233984
+bigdl/cpp/libs/save-load-state.exe,sha256=dPDBy-390FfjJdRNyT3wDCVO-AOQR-cc-qQ3lRWilV0,621056
+bigdl/cpp/libs/server.exe,sha256=vL9-altaO3mkhbyJDwpHXQliG9HJ5TFnjeUxEyMqUvc,1682432
+bigdl/cpp/libs/simple.exe,sha256=ABlsMXwln3t8t1oBXQBStYLH_pfjoCqnwQ1J4Bkcogg,200704
+bigdl/cpp/libs/speculative.exe,sha256=LpQQP8HEjpIYwff9-tQelhkN394k0iiYOGO2LagI0Qc,694784
+bigdl/cpp/libs/tokenize.exe,sha256=O99zmZEFlHYcCx0eRxWlqEY-JnOxPe-uRzDLOqzunwU,180224
+bigdl/cpp/libs/train-text-from-scratch.exe,sha256=RdDAH0gHsXpOaIWfvQJuKi_-E_HPcJctbssMbOSalOw,281600
+bigdl/cpp/libs/vdot.exe,sha256=qRe_Q0hVWNMrE0wYOWrqnT-gkb_JJXvtFrY87FV1kK4,56320
+bigdl_core_cpp-2.5.0b20240509.data/scripts/init-llama-cpp.bat,sha256=13AgSYRyzk6hlSz11dPyCMJeSoW9z8PZ9l85E_5GWxs,528
+bigdl_core_cpp-2.5.0b20240509.data/scripts/init-llama-cpp.ps1,sha256=JFOylLxO4MKpllHhdbPuJ1xHi9azxDpzdJns8JtZpkU,501
+bigdl_core_cpp-2.5.0b20240509.data/scripts/init-ollama.bat,sha256=OVX62Q7KH67z6rQrgQUwEc19E3eiAH-8HAIycDmySYc,350
+bigdl_core_cpp-2.5.0b20240509.dist-info/METADATA,sha256=9iisfcxQYJKB5XgFheM8-6Ki-tcW9s0SvCOji54qi2Y,668
+bigdl_core_cpp-2.5.0b20240509.dist-info/WHEEL,sha256=at4xwl6JdXdkZHxdo5ixTwJ7ENtVftSy2wqmsdmo_4U,98
+bigdl_core_cpp-2.5.0b20240509.dist-info/top_level.txt,sha256=iGuLfZARD_qANcIMfy0tbbrC3EtCg6BSiH8icc3dLWs,6
+bigdl_core_cpp-2.5.0b20240509.dist-info/RECORD,,