bigdl-core-cpp 2.1.0b2__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. bigdl/cpp/__init__.py +0 -0
  2. bigdl/cpp/convert-hf-to-gguf.py +2856 -0
  3. bigdl/cpp/convert.py +1714 -0
  4. bigdl/cpp/gguf-py/__init__.py +0 -0
  5. bigdl/cpp/gguf-py/gguf/__init__.py +7 -0
  6. bigdl/cpp/gguf-py/gguf/constants.py +1033 -0
  7. bigdl/cpp/gguf-py/gguf/gguf.py +15 -0
  8. bigdl/cpp/gguf-py/gguf/gguf_reader.py +296 -0
  9. bigdl/cpp/gguf-py/gguf/gguf_writer.py +554 -0
  10. bigdl/cpp/gguf-py/gguf/lazy.py +236 -0
  11. bigdl/cpp/gguf-py/gguf/py.typed +0 -0
  12. bigdl/cpp/gguf-py/gguf/quants.py +123 -0
  13. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +463 -0
  14. bigdl/cpp/gguf-py/gguf/vocab.py +165 -0
  15. bigdl/cpp/libs/baby-llama.exe +0 -0
  16. bigdl/cpp/libs/batched-bench.exe +0 -0
  17. bigdl/cpp/libs/batched.exe +0 -0
  18. bigdl/cpp/libs/beam-search.exe +0 -0
  19. bigdl/cpp/libs/benchmark.exe +0 -0
  20. bigdl/cpp/libs/common.lib +0 -0
  21. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  22. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu/ollama_llama_server.exe +0 -0
  23. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx/ollama_llama_server.exe +0 -0
  24. bigdl/cpp/libs/dist/windows-amd64/ollama_runners/cpu_avx2/ollama_llama_server.exe +0 -0
  25. bigdl/cpp/libs/embedding.exe +0 -0
  26. bigdl/cpp/libs/export-lora.exe +0 -0
  27. bigdl/cpp/libs/finetune.exe +0 -0
  28. bigdl/cpp/libs/ggml_shared.dll +0 -0
  29. bigdl/cpp/libs/gguf.exe +0 -0
  30. bigdl/cpp/libs/gritlm.exe +0 -0
  31. bigdl/cpp/libs/imatrix.exe +0 -0
  32. bigdl/cpp/libs/infill.exe +0 -0
  33. bigdl/cpp/libs/llama-bench.exe +0 -0
  34. bigdl/cpp/libs/llama.dll +0 -0
  35. bigdl/cpp/libs/llava-cli.exe +0 -0
  36. bigdl/cpp/libs/llava_shared.dll +0 -0
  37. bigdl/cpp/libs/lookahead.exe +0 -0
  38. bigdl/cpp/libs/lookup.exe +0 -0
  39. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  40. bigdl/cpp/libs/main.exe +0 -0
  41. bigdl/cpp/libs/ollama.exe +0 -0
  42. bigdl/cpp/libs/parallel.exe +0 -0
  43. bigdl/cpp/libs/passkey.exe +0 -0
  44. bigdl/cpp/libs/perplexity.exe +0 -0
  45. bigdl/cpp/libs/q8dot.exe +0 -0
  46. bigdl/cpp/libs/quantize-stats.exe +0 -0
  47. bigdl/cpp/libs/quantize.exe +0 -0
  48. bigdl/cpp/libs/save-load-state.exe +0 -0
  49. bigdl/cpp/libs/server.exe +0 -0
  50. bigdl/cpp/libs/simple.exe +0 -0
  51. bigdl/cpp/libs/speculative.exe +0 -0
  52. bigdl/cpp/libs/tokenize.exe +0 -0
  53. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  54. bigdl/cpp/libs/vdot.exe +0 -0
  55. bigdl_core_cpp-2.1.0b2.data/scripts/init-llama-cpp.bat +19 -0
  56. bigdl_core_cpp-2.1.0b2.data/scripts/init-llama-cpp.ps1 +13 -0
  57. bigdl_core_cpp-2.1.0b2.data/scripts/init-ollama.bat +13 -0
  58. bigdl_core_cpp-2.1.0b2.dist-info/METADATA +18 -0
  59. bigdl_core_cpp-2.1.0b2.dist-info/RECORD +61 -0
  60. bigdl_core_cpp-2.1.0b2.dist-info/WHEEL +5 -0
  61. bigdl_core_cpp-2.1.0b2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,15 @@
1
+ # This file left for compatibility. If you want to use the GGUF API from Python
2
+ # then don't import gguf/gguf.py directly. If you're looking for examples, see the
3
+ # examples/ directory for gguf-py
4
+
5
+ import importlib
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+
11
+ # Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
12
+ importlib.invalidate_caches()
13
+ import gguf # noqa: E402
14
+
15
+ importlib.reload(gguf)
@@ -0,0 +1,296 @@
1
+ #
2
+ # GGUF file reading/modification support. For API usage information,
3
+ # please see the files scripts/ for some fairly simple examples.
4
+ #
5
+ from __future__ import annotations
6
+
7
+ import logging
8
+ import os
9
+ from collections import OrderedDict
10
+ from typing import Any, Literal, NamedTuple, TypeVar, Union
11
+
12
+ import numpy as np
13
+ import numpy.typing as npt
14
+
15
+ from .quants import quant_shape_to_byte_shape
16
+
17
+ if __name__ == "__main__":
18
+ import sys
19
+ from pathlib import Path
20
+
21
+ # Allow running file in package as a script.
22
+ sys.path.insert(0, str(Path(__file__).parent.parent))
23
+
24
+ from gguf.constants import (
25
+ GGML_QUANT_SIZES,
26
+ GGUF_DEFAULT_ALIGNMENT,
27
+ GGUF_MAGIC,
28
+ GGUF_VERSION,
29
+ GGMLQuantizationType,
30
+ GGUFValueType,
31
+ )
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
36
+
37
+
38
+ class ReaderField(NamedTuple):
39
+ # Offset to start of this field.
40
+ offset: int
41
+
42
+ # Name of the field (not necessarily from file data).
43
+ name: str
44
+
45
+ # Data parts. Some types have multiple components, such as strings
46
+ # that consist of a length followed by the string data.
47
+ parts: list[npt.NDArray[Any]] = []
48
+
49
+ # Indexes into parts that we can call the actual data. For example
50
+ # an array of strings will be populated with indexes to the actual
51
+ # string data.
52
+ data: list[int] = [-1]
53
+
54
+ types: list[GGUFValueType] = []
55
+
56
+
57
+ class ReaderTensor(NamedTuple):
58
+ name: str
59
+ tensor_type: GGMLQuantizationType
60
+ shape: npt.NDArray[np.uint32]
61
+ n_elements: int
62
+ n_bytes: int
63
+ data_offset: int
64
+ data: npt.NDArray[Any]
65
+ field: ReaderField
66
+
67
+
68
+ class GGUFReader:
69
+ # I - same as host, S - swapped
70
+ byte_order: Literal['I'] | Literal['S'] = 'I'
71
+ alignment: int = GGUF_DEFAULT_ALIGNMENT
72
+
73
+ # Note: Internal helper, API may change.
74
+ gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
75
+ GGUFValueType.UINT8: np.uint8,
76
+ GGUFValueType.INT8: np.int8,
77
+ GGUFValueType.UINT16: np.uint16,
78
+ GGUFValueType.INT16: np.int16,
79
+ GGUFValueType.UINT32: np.uint32,
80
+ GGUFValueType.INT32: np.int32,
81
+ GGUFValueType.FLOAT32: np.float32,
82
+ GGUFValueType.UINT64: np.uint64,
83
+ GGUFValueType.INT64: np.int64,
84
+ GGUFValueType.FLOAT64: np.float64,
85
+ GGUFValueType.BOOL: np.bool_,
86
+ }
87
+
88
+ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r+'] | Literal['c'] = 'r'):
89
+ self.data = np.memmap(path, mode = mode)
90
+ offs = 0
91
+ if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
92
+ raise ValueError('GGUF magic invalid')
93
+ offs += 4
94
+ temp_version = self._get(offs, np.uint32)
95
+ if temp_version[0] & 65535 == 0:
96
+ # If we get 0 here that means it's (probably) a GGUF file created for
97
+ # the opposite byte order of the machine this script is running on.
98
+ self.byte_order = 'S'
99
+ temp_version = temp_version.newbyteorder(self.byte_order)
100
+ version = temp_version[0]
101
+ if version not in READER_SUPPORTED_VERSIONS:
102
+ raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
103
+ self.fields: OrderedDict[str, ReaderField] = OrderedDict()
104
+ self.tensors: list[ReaderTensor] = []
105
+ offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
106
+ temp_counts = self._get(offs, np.uint64, 2)
107
+ offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
108
+ offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
109
+ tensor_count, kv_count = temp_counts
110
+ offs = self._build_fields(offs, kv_count)
111
+ offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
112
+ new_align = self.fields.get('general.alignment')
113
+ if new_align is not None:
114
+ if new_align.types != [GGUFValueType.UINT32]:
115
+ raise ValueError('Bad type for general.alignment field')
116
+ self.alignment = new_align.parts[-1][0]
117
+ padding = offs % self.alignment
118
+ if padding != 0:
119
+ offs += self.alignment - padding
120
+ self._build_tensors(offs, tensors_fields)
121
+
122
+ _DT = TypeVar('_DT', bound = npt.DTypeLike)
123
+
124
+ # Fetch a key/value metadata field by key.
125
+ def get_field(self, key: str) -> Union[ReaderField, None]:
126
+ return self.fields.get(key, None)
127
+
128
+ # Fetch a tensor from the list by index.
129
+ def get_tensor(self, idx: int) -> ReaderTensor:
130
+ return self.tensors[idx]
131
+
132
+ def _get(
133
+ self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I'] | Literal['S'] | Literal['<'] = None,
134
+ ) -> npt.NDArray[Any]:
135
+ count = int(count)
136
+ itemsize = int(np.empty([], dtype = dtype).itemsize)
137
+ end_offs = offset + itemsize * count
138
+ return (
139
+ self.data[offset:end_offs]
140
+ .view(dtype = dtype)[:count]
141
+ .newbyteorder(override_order or self.byte_order)
142
+ )
143
+
144
+ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
145
+ if field.name in self.fields:
146
+ # TODO: add option to generate error on duplicate keys
147
+ # raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
148
+
149
+ logger.warning(f'Duplicate key {field.name} at offset {field.offset}')
150
+ self.fields[field.name + '_{}'.format(field.offset)] = field
151
+ else:
152
+ self.fields[field.name] = field
153
+ return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
154
+
155
+ def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
156
+ slen = self._get(offset, np.uint64)
157
+ return slen, self._get(offset + 8, np.uint8, slen[0])
158
+
159
+ def _get_field_parts(
160
+ self, orig_offs: int, raw_type: int,
161
+ ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
162
+ offs = orig_offs
163
+ types: list[GGUFValueType] = []
164
+ gtype = GGUFValueType(raw_type)
165
+ types.append(gtype)
166
+ # Handle strings.
167
+ if gtype == GGUFValueType.STRING:
168
+ sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
169
+ size = sum(int(part.nbytes) for part in sparts)
170
+ return size, sparts, [1], types
171
+ # Check if it's a simple scalar type.
172
+ nptype = self.gguf_scalar_to_np.get(gtype)
173
+ if nptype is not None:
174
+ val = self._get(offs, nptype)
175
+ return int(val.nbytes), [val], [0], types
176
+ # Handle arrays.
177
+ if gtype == GGUFValueType.ARRAY:
178
+ raw_itype = self._get(offs, np.uint32)
179
+ offs += int(raw_itype.nbytes)
180
+ alen = self._get(offs, np.uint64)
181
+ offs += int(alen.nbytes)
182
+ aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
183
+ data_idxs: list[int] = []
184
+ for idx in range(alen[0]):
185
+ curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
186
+ if idx == 0:
187
+ types += curr_types
188
+ idxs_offs = len(aparts)
189
+ aparts += curr_parts
190
+ data_idxs += (idx + idxs_offs for idx in curr_idxs)
191
+ offs += curr_size
192
+ return offs - orig_offs, aparts, data_idxs, types
193
+ # We can't deal with this one.
194
+ raise ValueError('Unknown/unhandled field type {gtype}')
195
+
196
+ def _get_tensor(self, orig_offs: int) -> ReaderField:
197
+ offs = orig_offs
198
+ name_len, name_data = self._get_str(offs)
199
+ offs += int(name_len.nbytes + name_data.nbytes)
200
+ n_dims = self._get(offs, np.uint32)
201
+ offs += int(n_dims.nbytes)
202
+ dims = self._get(offs, np.uint64, n_dims[0])
203
+ offs += int(dims.nbytes)
204
+ raw_dtype = self._get(offs, np.uint32)
205
+ offs += int(raw_dtype.nbytes)
206
+ offset_tensor = self._get(offs, np.uint64)
207
+ offs += int(offset_tensor.nbytes)
208
+ return ReaderField(
209
+ orig_offs,
210
+ str(bytes(name_data), encoding = 'utf-8'),
211
+ [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
212
+ [1, 3, 4, 5],
213
+ )
214
+
215
+ def _build_fields(self, offs: int, count: int) -> int:
216
+ for _ in range(count):
217
+ orig_offs = offs
218
+ kv_klen, kv_kdata = self._get_str(offs)
219
+ offs += int(kv_klen.nbytes + kv_kdata.nbytes)
220
+ raw_kv_type = self._get(offs, np.uint32)
221
+ offs += int(raw_kv_type.nbytes)
222
+ parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
223
+ idxs_offs = len(parts)
224
+ field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
225
+ parts += field_parts
226
+ self._push_field(ReaderField(
227
+ orig_offs,
228
+ str(bytes(kv_kdata), encoding = 'utf-8'),
229
+ parts,
230
+ [idx + idxs_offs for idx in field_idxs],
231
+ field_types,
232
+ ), skip_sum = True)
233
+ offs += field_size
234
+ return offs
235
+
236
+ def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
237
+ tensor_fields = []
238
+ for _ in range(count):
239
+ field = self._get_tensor(offs)
240
+ offs += sum(int(part.nbytes) for part in field.parts)
241
+ tensor_fields.append(field)
242
+ return offs, tensor_fields
243
+
244
+ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
245
+ tensors = []
246
+ tensor_names = set() # keep track of name to prevent duplicated tensors
247
+ for field in fields:
248
+ _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
249
+ # check if there's any tensor having same name already in the list
250
+ tensor_name = str(bytes(name_data), encoding = 'utf-8')
251
+ if tensor_name in tensor_names:
252
+ raise ValueError(f'Found duplicated tensor with name {tensor_name}')
253
+ tensor_names.add(tensor_name)
254
+ ggml_type = GGMLQuantizationType(raw_dtype[0])
255
+ n_elems = int(np.prod(dims))
256
+ np_dims = tuple(reversed(dims.tolist()))
257
+ block_size, type_size = GGML_QUANT_SIZES[ggml_type]
258
+ n_bytes = n_elems * type_size // block_size
259
+ data_offs = int(start_offs + offset_tensor[0])
260
+ item_type: npt.DTypeLike
261
+ if ggml_type == GGMLQuantizationType.F16:
262
+ item_count = n_elems
263
+ item_type = np.float16
264
+ elif ggml_type == GGMLQuantizationType.F32:
265
+ item_count = n_elems
266
+ item_type = np.float32
267
+ elif ggml_type == GGMLQuantizationType.F64:
268
+ item_count = n_elems
269
+ item_type = np.float64
270
+ elif ggml_type == GGMLQuantizationType.I8:
271
+ item_count = n_elems
272
+ item_type = np.int8
273
+ elif ggml_type == GGMLQuantizationType.I16:
274
+ item_count = n_elems
275
+ item_type = np.int16
276
+ elif ggml_type == GGMLQuantizationType.I32:
277
+ item_count = n_elems
278
+ item_type = np.int32
279
+ elif ggml_type == GGMLQuantizationType.I64:
280
+ item_count = n_elems
281
+ item_type = np.int64
282
+ else:
283
+ item_count = n_bytes
284
+ item_type = np.uint8
285
+ np_dims = quant_shape_to_byte_shape(np_dims, ggml_type)
286
+ tensors.append(ReaderTensor(
287
+ name = tensor_name,
288
+ tensor_type = ggml_type,
289
+ shape = dims,
290
+ n_elements = n_elems,
291
+ n_bytes = n_bytes,
292
+ data_offset = data_offs,
293
+ data = self._get(data_offs, item_type, item_count).reshape(np_dims),
294
+ field = field,
295
+ ))
296
+ self.tensors = tensors