bigdl-core-cpp 2.5.0b20240421__py3-none-win_amd64.whl → 2.5.0b20240423__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. bigdl/cpp/convert.py +276 -189
  2. bigdl/cpp/gguf-py/__init__.py +0 -0
  3. bigdl/cpp/gguf-py/gguf/__init__.py +5 -0
  4. bigdl/cpp/gguf-py/gguf/constants.py +943 -0
  5. bigdl/cpp/gguf-py/gguf/gguf.py +15 -0
  6. bigdl/cpp/gguf-py/gguf/gguf_reader.py +279 -0
  7. bigdl/cpp/gguf-py/gguf/gguf_writer.py +518 -0
  8. bigdl/cpp/gguf-py/gguf/tensor_mapping.py +434 -0
  9. bigdl/cpp/gguf-py/gguf/vocab.py +181 -0
  10. bigdl/cpp/libs/baby-llama.exe +0 -0
  11. bigdl/cpp/libs/batched-bench.exe +0 -0
  12. bigdl/cpp/libs/batched.exe +0 -0
  13. bigdl/cpp/libs/beam-search.exe +0 -0
  14. bigdl/cpp/libs/benchmark.exe +0 -0
  15. bigdl/cpp/libs/convert-llama2c-to-ggml.exe +0 -0
  16. bigdl/cpp/libs/embedding.exe +0 -0
  17. bigdl/cpp/libs/export-lora.exe +0 -0
  18. bigdl/cpp/libs/finetune.exe +0 -0
  19. bigdl/cpp/libs/gguf.exe +0 -0
  20. bigdl/cpp/libs/gritlm.exe +0 -0
  21. bigdl/cpp/libs/imatrix.exe +0 -0
  22. bigdl/cpp/libs/infill.exe +0 -0
  23. bigdl/cpp/libs/llama-bench.exe +0 -0
  24. bigdl/cpp/libs/llava-cli.exe +0 -0
  25. bigdl/cpp/libs/lookahead.exe +0 -0
  26. bigdl/cpp/libs/lookup.exe +0 -0
  27. bigdl/cpp/libs/ls-sycl-device.exe +0 -0
  28. bigdl/cpp/libs/main.exe +0 -0
  29. bigdl/cpp/libs/ollama.exe +0 -0
  30. bigdl/cpp/libs/parallel.exe +0 -0
  31. bigdl/cpp/libs/passkey.exe +0 -0
  32. bigdl/cpp/libs/perplexity.exe +0 -0
  33. bigdl/cpp/libs/q8dot.exe +0 -0
  34. bigdl/cpp/libs/quantize-stats.exe +0 -0
  35. bigdl/cpp/libs/quantize.exe +0 -0
  36. bigdl/cpp/libs/save-load-state.exe +0 -0
  37. bigdl/cpp/libs/server.exe +0 -0
  38. bigdl/cpp/libs/simple.exe +0 -0
  39. bigdl/cpp/libs/speculative.exe +0 -0
  40. bigdl/cpp/libs/tokenize.exe +0 -0
  41. bigdl/cpp/libs/train-text-from-scratch.exe +0 -0
  42. bigdl/cpp/libs/vdot.exe +0 -0
  43. {bigdl_core_cpp-2.5.0b20240421.data → bigdl_core_cpp-2.5.0b20240423.data}/scripts/init-llama-cpp.bat +1 -0
  44. {bigdl_core_cpp-2.5.0b20240421.dist-info → bigdl_core_cpp-2.5.0b20240423.dist-info}/METADATA +3 -3
  45. bigdl_core_cpp-2.5.0b20240423.dist-info/RECORD +50 -0
  46. bigdl_core_cpp-2.5.0b20240421.dist-info/RECORD +0 -42
  47. {bigdl_core_cpp-2.5.0b20240421.data → bigdl_core_cpp-2.5.0b20240423.data}/scripts/init-llama-cpp.ps1 +0 -0
  48. {bigdl_core_cpp-2.5.0b20240421.data → bigdl_core_cpp-2.5.0b20240423.data}/scripts/init-ollama.bat +0 -0
  49. {bigdl_core_cpp-2.5.0b20240421.dist-info → bigdl_core_cpp-2.5.0b20240423.dist-info}/WHEEL +0 -0
  50. {bigdl_core_cpp-2.5.0b20240421.dist-info → bigdl_core_cpp-2.5.0b20240423.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,15 @@
1
+ # This file left for compatibility. If you want to use the GGUF API from Python
2
+ # then don't import gguf/gguf.py directly. If you're looking for examples, see the
3
+ # examples/ directory for gguf-py
4
+
5
+ import importlib
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ sys.path.insert(0, str(Path(__file__).parent.parent))
10
+
11
+ # Compatibility for people trying to import gguf/gguf.py directly instead of as a package.
12
+ importlib.invalidate_caches()
13
+ import gguf # noqa: E402
14
+
15
+ importlib.reload(gguf)
@@ -0,0 +1,279 @@
1
+ #
2
+ # GGUF file reading/modification support. For API usage information,
3
+ # please see the files scripts/ for some fairly simple examples.
4
+ #
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from collections import OrderedDict
9
+ from typing import Any, Literal, NamedTuple, TypeVar, Union
10
+
11
+ import numpy as np
12
+ import numpy.typing as npt
13
+
14
+ if __name__ == "__main__":
15
+ import sys
16
+ from pathlib import Path
17
+
18
+ # Allow running file in package as a script.
19
+ sys.path.insert(0, str(Path(__file__).parent.parent))
20
+
21
+ from gguf.constants import (
22
+ GGML_QUANT_SIZES,
23
+ GGUF_DEFAULT_ALIGNMENT,
24
+ GGUF_MAGIC,
25
+ GGUF_VERSION,
26
+ GGMLQuantizationType,
27
+ GGUFValueType,
28
+ )
29
+
30
+
31
+ READER_SUPPORTED_VERSIONS = [2, GGUF_VERSION]
32
+
33
+
34
+ class ReaderField(NamedTuple):
35
+ # Offset to start of this field.
36
+ offset: int
37
+
38
+ # Name of the field (not necessarily from file data).
39
+ name: str
40
+
41
+ # Data parts. Some types have multiple components, such as strings
42
+ # that consist of a length followed by the string data.
43
+ parts: list[npt.NDArray[Any]] = []
44
+
45
+ # Indexes into parts that we can call the actual data. For example
46
+ # an array of strings will be populated with indexes to the actual
47
+ # string data.
48
+ data: list[int] = [-1]
49
+
50
+ types: list[GGUFValueType] = []
51
+
52
+
53
+ class ReaderTensor(NamedTuple):
54
+ name: str
55
+ tensor_type: GGMLQuantizationType
56
+ shape: npt.NDArray[np.uint32]
57
+ n_elements: int
58
+ n_bytes: int
59
+ data_offset: int
60
+ data: npt.NDArray[Any]
61
+ field: ReaderField
62
+
63
+
64
+ class GGUFReader:
65
+ # I - same as host, S - swapped
66
+ byte_order: Literal['I' | 'S'] = 'I'
67
+ alignment: int = GGUF_DEFAULT_ALIGNMENT
68
+
69
+ # Note: Internal helper, API may change.
70
+ gguf_scalar_to_np: dict[GGUFValueType, type[np.generic]] = {
71
+ GGUFValueType.UINT8: np.uint8,
72
+ GGUFValueType.INT8: np.int8,
73
+ GGUFValueType.UINT16: np.uint16,
74
+ GGUFValueType.INT16: np.int16,
75
+ GGUFValueType.UINT32: np.uint32,
76
+ GGUFValueType.INT32: np.int32,
77
+ GGUFValueType.FLOAT32: np.float32,
78
+ GGUFValueType.UINT64: np.uint64,
79
+ GGUFValueType.INT64: np.int64,
80
+ GGUFValueType.FLOAT64: np.float64,
81
+ GGUFValueType.BOOL: np.bool_,
82
+ }
83
+
84
+ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r'):
85
+ self.data = np.memmap(path, mode = mode)
86
+ offs = 0
87
+ if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
88
+ raise ValueError('GGUF magic invalid')
89
+ offs += 4
90
+ temp_version = self._get(offs, np.uint32)
91
+ if temp_version[0] & 65535 == 0:
92
+ # If we get 0 here that means it's (probably) a GGUF file created for
93
+ # the opposite byte order of the machine this script is running on.
94
+ self.byte_order = 'S'
95
+ temp_version = temp_version.newbyteorder(self.byte_order)
96
+ version = temp_version[0]
97
+ if version not in READER_SUPPORTED_VERSIONS:
98
+ raise ValueError(f'Sorry, file appears to be version {version} which we cannot handle')
99
+ self.fields: OrderedDict[str, ReaderField] = OrderedDict()
100
+ self.tensors: list[ReaderTensor] = []
101
+ offs += self._push_field(ReaderField(offs, 'GGUF.version', [temp_version], [0], [GGUFValueType.UINT32]))
102
+ temp_counts = self._get(offs, np.uint64, 2)
103
+ offs += self._push_field(ReaderField(offs, 'GGUF.tensor_count', [temp_counts[:1]], [0], [GGUFValueType.UINT64]))
104
+ offs += self._push_field(ReaderField(offs, 'GGUF.kv_count', [temp_counts[1:]], [0], [GGUFValueType.UINT64]))
105
+ tensor_count, kv_count = temp_counts
106
+ offs = self._build_fields(offs, kv_count)
107
+ offs, tensors_fields = self._build_tensors_fields(offs, tensor_count)
108
+ new_align = self.fields.get('general.alignment')
109
+ if new_align is not None:
110
+ if new_align.types != [GGUFValueType.UINT32]:
111
+ raise ValueError('Bad type for general.alignment field')
112
+ self.alignment = new_align.parts[-1][0]
113
+ padding = offs % self.alignment
114
+ if padding != 0:
115
+ offs += self.alignment - padding
116
+ self._build_tensors(offs, tensors_fields)
117
+
118
+ _DT = TypeVar('_DT', bound = npt.DTypeLike)
119
+
120
+ # Fetch a key/value metadata field by key.
121
+ def get_field(self, key: str) -> Union[ReaderField, None]:
122
+ return self.fields.get(key, None)
123
+
124
+ # Fetch a tensor from the list by index.
125
+ def get_tensor(self, idx: int) -> ReaderTensor:
126
+ return self.tensors[idx]
127
+
128
+ def _get(
129
+ self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None,
130
+ ) -> npt.NDArray[Any]:
131
+ count = int(count)
132
+ itemsize = int(np.empty([], dtype = dtype).itemsize)
133
+ end_offs = offset + itemsize * count
134
+ return (
135
+ self.data[offset:end_offs]
136
+ .view(dtype = dtype)[:count]
137
+ .newbyteorder(override_order or self.byte_order)
138
+ )
139
+
140
+ def _push_field(self, field: ReaderField, skip_sum: bool = False) -> int:
141
+ if field.name in self.fields:
142
+ raise KeyError(f'Duplicate {field.name} already in list at offset {field.offset}')
143
+ self.fields[field.name] = field
144
+ return 0 if skip_sum else sum(int(part.nbytes) for part in field.parts)
145
+
146
+ def _get_str(self, offset: int) -> tuple[npt.NDArray[np.uint64], npt.NDArray[np.uint8]]:
147
+ slen = self._get(offset, np.uint64)
148
+ return slen, self._get(offset + 8, np.uint8, slen[0])
149
+
150
+ def _get_field_parts(
151
+ self, orig_offs: int, raw_type: int,
152
+ ) -> tuple[int, list[npt.NDArray[Any]], list[int], list[GGUFValueType]]:
153
+ offs = orig_offs
154
+ types: list[GGUFValueType] = []
155
+ gtype = GGUFValueType(raw_type)
156
+ types.append(gtype)
157
+ # Handle strings.
158
+ if gtype == GGUFValueType.STRING:
159
+ sparts: list[npt.NDArray[Any]] = list(self._get_str(offs))
160
+ size = sum(int(part.nbytes) for part in sparts)
161
+ return size, sparts, [1], types
162
+ # Check if it's a simple scalar type.
163
+ nptype = self.gguf_scalar_to_np.get(gtype)
164
+ if nptype is not None:
165
+ val = self._get(offs, nptype)
166
+ return int(val.nbytes), [val], [0], types
167
+ # Handle arrays.
168
+ if gtype == GGUFValueType.ARRAY:
169
+ raw_itype = self._get(offs, np.uint32)
170
+ offs += int(raw_itype.nbytes)
171
+ alen = self._get(offs, np.uint64)
172
+ offs += int(alen.nbytes)
173
+ aparts: list[npt.NDArray[Any]] = [raw_itype, alen]
174
+ data_idxs: list[int] = []
175
+ for idx in range(alen[0]):
176
+ curr_size, curr_parts, curr_idxs, curr_types = self._get_field_parts(offs, raw_itype[0])
177
+ if idx == 0:
178
+ types += curr_types
179
+ idxs_offs = len(aparts)
180
+ aparts += curr_parts
181
+ data_idxs += (idx + idxs_offs for idx in curr_idxs)
182
+ offs += curr_size
183
+ return offs - orig_offs, aparts, data_idxs, types
184
+ # We can't deal with this one.
185
+ raise ValueError('Unknown/unhandled field type {gtype}')
186
+
187
+ def _get_tensor(self, orig_offs: int) -> ReaderField:
188
+ offs = orig_offs
189
+ name_len, name_data = self._get_str(offs)
190
+ offs += int(name_len.nbytes + name_data.nbytes)
191
+ n_dims = self._get(offs, np.uint32)
192
+ offs += int(n_dims.nbytes)
193
+ dims = self._get(offs, np.uint64, n_dims[0])
194
+ offs += int(dims.nbytes)
195
+ raw_dtype = self._get(offs, np.uint32)
196
+ offs += int(raw_dtype.nbytes)
197
+ offset_tensor = self._get(offs, np.uint64)
198
+ offs += int(offset_tensor.nbytes)
199
+ return ReaderField(
200
+ orig_offs,
201
+ str(bytes(name_data), encoding = 'utf-8'),
202
+ [name_len, name_data, n_dims, dims, raw_dtype, offset_tensor],
203
+ [1, 3, 4, 5],
204
+ )
205
+
206
+ def _build_fields(self, offs: int, count: int) -> int:
207
+ for _ in range(count):
208
+ orig_offs = offs
209
+ kv_klen, kv_kdata = self._get_str(offs)
210
+ offs += int(kv_klen.nbytes + kv_kdata.nbytes)
211
+ raw_kv_type = self._get(offs, np.uint32)
212
+ offs += int(raw_kv_type.nbytes)
213
+ parts: list[npt.NDArray[Any]] = [kv_klen, kv_kdata, raw_kv_type]
214
+ idxs_offs = len(parts)
215
+ field_size, field_parts, field_idxs, field_types = self._get_field_parts(offs, raw_kv_type[0])
216
+ parts += field_parts
217
+ self._push_field(ReaderField(
218
+ orig_offs,
219
+ str(bytes(kv_kdata), encoding = 'utf-8'),
220
+ parts,
221
+ [idx + idxs_offs for idx in field_idxs],
222
+ field_types,
223
+ ), skip_sum = True)
224
+ offs += field_size
225
+ return offs
226
+
227
+ def _build_tensors_fields(self, offs: int, count: int) -> tuple[int, list[ReaderField]]:
228
+ tensor_fields = []
229
+ for _ in range(count):
230
+ field = self._get_tensor(offs)
231
+ offs += sum(int(part.nbytes) for part in field.parts)
232
+ tensor_fields.append(field)
233
+ return offs, tensor_fields
234
+
235
+ def _build_tensors(self, start_offs: int, fields: list[ReaderField]) -> None:
236
+ tensors = []
237
+ for field in fields:
238
+ _name_len, name_data, _n_dims, dims, raw_dtype, offset_tensor = field.parts
239
+ ggml_type = GGMLQuantizationType(raw_dtype[0])
240
+ n_elems = np.prod(dims)
241
+ block_size, type_size = GGML_QUANT_SIZES[ggml_type]
242
+ n_bytes = n_elems * type_size // block_size
243
+ data_offs = int(start_offs + offset_tensor[0])
244
+ item_type: npt.DTypeLike
245
+ if ggml_type == GGMLQuantizationType.F16:
246
+ item_count = n_elems
247
+ item_type = np.float16
248
+ elif ggml_type == GGMLQuantizationType.F32:
249
+ item_count = n_elems
250
+ item_type = np.float32
251
+ elif ggml_type == GGMLQuantizationType.F64:
252
+ item_count = n_elems
253
+ item_type = np.float64
254
+ elif ggml_type == GGMLQuantizationType.I8:
255
+ item_count = n_elems
256
+ item_type = np.int8
257
+ elif ggml_type == GGMLQuantizationType.I16:
258
+ item_count = n_elems
259
+ item_type = np.int16
260
+ elif ggml_type == GGMLQuantizationType.I32:
261
+ item_count = n_elems
262
+ item_type = np.int32
263
+ elif ggml_type == GGMLQuantizationType.I64:
264
+ item_count = n_elems
265
+ item_type = np.int64
266
+ else:
267
+ item_count = n_bytes
268
+ item_type = np.uint8
269
+ tensors.append(ReaderTensor(
270
+ name = str(bytes(name_data), encoding = 'utf-8'),
271
+ tensor_type = ggml_type,
272
+ shape = dims,
273
+ n_elements = n_elems,
274
+ n_bytes = n_bytes,
275
+ data_offset = data_offs,
276
+ data = self._get(data_offs, item_type, item_count),
277
+ field = field,
278
+ ))
279
+ self.tensors = tensors