safetensors 0.7.0__pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
safetensors/paddle.py ADDED
@@ -0,0 +1,290 @@
1
+ import os
2
+ import sys
3
+ from typing import Any, Dict, Optional, Union
4
+
5
+ import numpy as np
6
+ import paddle
7
+
8
+ from safetensors import numpy, deserialize, safe_open, serialize, serialize_file
9
+
10
+
11
+ def save(
12
+ tensors: Dict[str, paddle.Tensor], metadata: Optional[Dict[str, str]] = None
13
+ ) -> bytes:
14
+ """
15
+ Saves a dictionary of tensors into raw bytes in safetensors format.
16
+
17
+ Args:
18
+ tensors (`Dict[str, paddle.Tensor]`):
19
+ The incoming tensors. Tensors need to be contiguous and dense.
20
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
21
+ Optional text only metadata you might want to save in your header.
22
+ For instance it can be useful to specify more about the underlying
23
+ tensors. This is purely informative and does not affect tensor loading.
24
+
25
+ Returns:
26
+ `bytes`: The raw bytes representing the format
27
+
28
+ Example:
29
+
30
+ ```python
31
+ from safetensors.paddle import save
32
+ import paddle
33
+
34
+ tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))}
35
+ byte_data = save(tensors)
36
+ ```
37
+ """
38
+ serialized = serialize(_flatten(tensors), metadata=metadata)
39
+ result = bytes(serialized)
40
+ return result
41
+
42
+
43
+ def save_file(
44
+ tensors: Dict[str, paddle.Tensor],
45
+ filename: Union[str, os.PathLike],
46
+ metadata: Optional[Dict[str, str]] = None,
47
+ ) -> None:
48
+ """
49
+ Saves a dictionary of tensors into raw bytes in safetensors format.
50
+
51
+ Args:
52
+ tensors (`Dict[str, paddle.Tensor]`):
53
+ The incoming tensors. Tensors need to be contiguous and dense.
54
+ filename (`str`, or `os.PathLike`)):
55
+ The filename we're saving into.
56
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
57
+ Optional text only metadata you might want to save in your header.
58
+ For instance it can be useful to specify more about the underlying
59
+ tensors. This is purely informative and does not affect tensor loading.
60
+
61
+ Returns:
62
+ `None`
63
+
64
+ Example:
65
+
66
+ ```python
67
+ from safetensors.paddle import save_file
68
+ import paddle
69
+
70
+ tensors = {"embedding": paddle.zeros((512, 1024)), "attention": paddle.zeros((256, 256))}
71
+ save_file(tensors, "model.safetensors")
72
+ ```
73
+ """
74
+ serialize_file(_flatten(tensors), filename, metadata=metadata)
75
+
76
+
77
+ def load(data: bytes, device: str = "cpu") -> Dict[str, paddle.Tensor]:
78
+ """
79
+ Loads a safetensors file into paddle format from pure bytes.
80
+
81
+ Args:
82
+ data (`bytes`):
83
+ The content of a safetensors file
84
+
85
+ Returns:
86
+ `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor` on cpu
87
+
88
+ Example:
89
+
90
+ ```python
91
+ from safetensors.paddle import load
92
+
93
+ file_path = "./my_folder/bert.safetensors"
94
+ with open(file_path, "rb") as f:
95
+ data = f.read()
96
+
97
+ loaded = load(data)
98
+ ```
99
+ """
100
+ if paddle.__version__ >= "3.2.0":
101
+ flat = deserialize(data)
102
+ return _view2paddle(flat, device)
103
+ else:
104
+ flat = numpy.load(data)
105
+ return _np2paddle(flat, device)
106
+
107
+
108
+ def load_file(
109
+ filename: Union[str, os.PathLike], device="cpu"
110
+ ) -> Dict[str, paddle.Tensor]:
111
+ """
112
+ Loads a safetensors file into paddle format.
113
+
114
+ Args:
115
+ filename (`str`, or `os.PathLike`)):
116
+ The name of the file which contains the tensors
117
+ device (`Union[Dict[str, any], str]`, *optional*, defaults to `cpu`):
118
+ The device where the tensors need to be located after load.
119
+ available options are all regular paddle device locations
120
+
121
+ Returns:
122
+ `Dict[str, paddle.Tensor]`: dictionary that contains name as key, value as `paddle.Tensor`
123
+
124
+ Example:
125
+
126
+ ```python
127
+ from safetensors.paddle import load_file
128
+
129
+ file_path = "./my_folder/bert.safetensors"
130
+ loaded = load_file(file_path)
131
+ ```
132
+ """
133
+ result = {}
134
+ if paddle.__version__ >= "3.2.0":
135
+ with safe_open(filename, framework="paddle", device=device) as f:
136
+ for k in f.offset_keys():
137
+ result[k] = f.get_tensor(k)
138
+ else:
139
+ flat = numpy.load_file(filename)
140
+ result = _np2paddle(flat, device)
141
+ return result
142
+
143
+
144
+ def _np2paddle(
145
+ numpy_dict: Dict[str, np.ndarray], device: str = "cpu"
146
+ ) -> Dict[str, paddle.Tensor]:
147
+ for k, v in numpy_dict.items():
148
+ numpy_dict[k] = paddle.to_tensor(v, place=device)
149
+ return numpy_dict
150
+
151
+
152
+ def _paddle2np(paddle_dict: Dict[str, paddle.Tensor]) -> Dict[str, np.array]:
153
+ for k, v in paddle_dict.items():
154
+ paddle_dict[k] = v.detach().cpu().numpy()
155
+ return paddle_dict
156
+
157
+
158
+ _SIZE = {
159
+ paddle.int64: 8,
160
+ paddle.float32: 4,
161
+ paddle.int32: 4,
162
+ paddle.bfloat16: 2,
163
+ paddle.float16: 2,
164
+ paddle.int16: 2,
165
+ paddle.uint8: 1,
166
+ paddle.int8: 1,
167
+ paddle.bool: 1,
168
+ paddle.float64: 8,
169
+ paddle.float8_e4m3fn: 1,
170
+ paddle.float8_e5m2: 1,
171
+ paddle.complex64: 8,
172
+ # XXX: These are not supported yet in paddle
173
+ # paddle.uint64: 8,
174
+ # paddle.uint32: 4,
175
+ # paddle.uint16: 2,
176
+ # paddle.float8_e8m0: 1,
177
+ # paddle.float4_e2m1_x2: 1,
178
+ }
179
+
180
+ _TYPES = {
181
+ "F64": paddle.float64,
182
+ "F32": paddle.float32,
183
+ "F16": paddle.float16,
184
+ "BF16": paddle.bfloat16,
185
+ "I64": paddle.int64,
186
+ "I32": paddle.int32,
187
+ "I16": paddle.int16,
188
+ "I8": paddle.int8,
189
+ "U8": paddle.uint8,
190
+ "BOOL": paddle.bool,
191
+ "F8_E4M3": paddle.float8_e4m3fn,
192
+ "F8_E5M2": paddle.float8_e5m2,
193
+ }
194
+
195
+ NPDTYPES = {
196
+ paddle.int64: np.int64,
197
+ paddle.float32: np.float32,
198
+ paddle.int32: np.int32,
199
+ # XXX: This is ok because both have the same width
200
+ paddle.bfloat16: np.float16,
201
+ paddle.float16: np.float16,
202
+ paddle.int16: np.int16,
203
+ paddle.uint8: np.uint8,
204
+ paddle.int8: np.int8,
205
+ paddle.bool: bool,
206
+ paddle.float64: np.float64,
207
+ # XXX: This is ok because both have the same width and byteswap is a no-op anyway
208
+ paddle.float8_e4m3fn: np.uint8,
209
+ paddle.float8_e5m2: np.uint8,
210
+ }
211
+
212
+
213
+ def _getdtype(dtype_str: str) -> paddle.dtype:
214
+ return _TYPES[dtype_str]
215
+
216
+
217
+ def _view2paddle(safeview, device) -> Dict[str, paddle.Tensor]:
218
+ result = {}
219
+ for k, v in safeview:
220
+ dtype = _getdtype(v["dtype"])
221
+ if len(v["data"]) == 0:
222
+ # Workaround because frombuffer doesn't accept zero-size tensors
223
+ assert any(x == 0 for x in v["shape"])
224
+ arr = paddle.empty(v["shape"], dtype=dtype)
225
+ else:
226
+ arr = paddle.base.core.frombuffer(v["data"], dtype).reshape(v["shape"])
227
+ if device != "cpu":
228
+ arr = arr.to(device)
229
+ if sys.byteorder == "big":
230
+ arr = paddle.to_tensor(arr.numpy().byteswap(inplace=False), place=device)
231
+ result[k] = arr
232
+
233
+ return result
234
+
235
+
236
+ def _tobytes(tensor: paddle.Tensor, name: str) -> bytes:
237
+ if not tensor.is_contiguous():
238
+ raise ValueError(
239
+ f"You are trying to save a non contiguous tensor: `{name}` which is not allowed. It either means you"
240
+ " are trying to save tensors which are reference of each other in which case it's recommended to save"
241
+ " only the full tensors, and reslice at load time, or simply call `.contiguous()` on your tensor to"
242
+ " pack it before saving."
243
+ )
244
+ if not tensor.place.is_cpu_place():
245
+ # Moving tensor to cpu before saving
246
+ tensor = tensor.cpu()
247
+
248
+ import ctypes
249
+
250
+ import numpy as np
251
+
252
+ # When shape is empty (scalar), np.prod returns a float
253
+ # we need a int for the following calculations
254
+ length = int(np.prod(tensor.shape).item())
255
+ bytes_per_item = _SIZE[tensor.dtype]
256
+
257
+ total_bytes = length * bytes_per_item
258
+
259
+ ptr = tensor.data_ptr()
260
+ if ptr == 0:
261
+ return b""
262
+ newptr = ctypes.cast(ptr, ctypes.POINTER(ctypes.c_ubyte))
263
+ data = np.ctypeslib.as_array(newptr, (total_bytes,)) # no internal copy
264
+ if sys.byteorder == "big":
265
+ npdtype = NPDTYPES[tensor.dtype]
266
+ # Not in place as that would potentially modify a live running model
267
+ data = data.view(npdtype).byteswap(inplace=False)
268
+ return data.tobytes()
269
+
270
+
271
+ def _flatten(tensors: Dict[str, paddle.Tensor]) -> Dict[str, Dict[str, Any]]:
272
+ if not isinstance(tensors, dict):
273
+ raise ValueError(
274
+ f"Expected a dict of [str, paddle.Tensor] but received {type(tensors)}"
275
+ )
276
+
277
+ for k, v in tensors.items():
278
+ if not isinstance(v, paddle.Tensor):
279
+ raise ValueError(
280
+ f"Key `{k}` is invalid, expected paddle.Tensor but received {type(v)}"
281
+ )
282
+
283
+ return {
284
+ k: {
285
+ "dtype": str(v.dtype).split(".")[-1],
286
+ "shape": v.shape,
287
+ "data": _tobytes(v, k),
288
+ }
289
+ for k, v in tensors.items()
290
+ }
safetensors/py.typed ADDED
File without changes
@@ -0,0 +1,139 @@
1
+ import os
2
+ from typing import Dict, Optional, Union
3
+
4
+ import numpy as np
5
+ import tensorflow as tf
6
+
7
+ from safetensors import numpy, safe_open
8
+
9
+
10
+ def save(
11
+ tensors: Dict[str, tf.Tensor], metadata: Optional[Dict[str, str]] = None
12
+ ) -> bytes:
13
+ """
14
+ Saves a dictionary of tensors into raw bytes in safetensors format.
15
+
16
+ Args:
17
+ tensors (`Dict[str, tf.Tensor]`):
18
+ The incoming tensors. Tensors need to be contiguous and dense.
19
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
20
+ Optional text only metadata you might want to save in your header.
21
+ For instance it can be useful to specify more about the underlying
22
+ tensors. This is purely informative and does not affect tensor loading.
23
+
24
+ Returns:
25
+ `bytes`: The raw bytes representing the format
26
+
27
+ Example:
28
+
29
+ ```python
30
+ from safetensors.tensorflow import save
31
+ import tensorflow as tf
32
+
33
+ tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))}
34
+ byte_data = save(tensors)
35
+ ```
36
+ """
37
+ np_tensors = _tf2np(tensors)
38
+ return numpy.save(np_tensors, metadata=metadata)
39
+
40
+
41
+ def save_file(
42
+ tensors: Dict[str, tf.Tensor],
43
+ filename: Union[str, os.PathLike],
44
+ metadata: Optional[Dict[str, str]] = None,
45
+ ) -> None:
46
+ """
47
+ Saves a dictionary of tensors into raw bytes in safetensors format.
48
+
49
+ Args:
50
+ tensors (`Dict[str, tf.Tensor]`):
51
+ The incoming tensors. Tensors need to be contiguous and dense.
52
+ filename (`str`, or `os.PathLike`)):
53
+ The filename we're saving into.
54
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
55
+ Optional text only metadata you might want to save in your header.
56
+ For instance it can be useful to specify more about the underlying
57
+ tensors. This is purely informative and does not affect tensor loading.
58
+
59
+ Returns:
60
+ `None`
61
+
62
+ Example:
63
+
64
+ ```python
65
+ from safetensors.tensorflow import save_file
66
+ import tensorflow as tf
67
+
68
+ tensors = {"embedding": tf.zeros((512, 1024)), "attention": tf.zeros((256, 256))}
69
+ save_file(tensors, "model.safetensors")
70
+ ```
71
+ """
72
+ np_tensors = _tf2np(tensors)
73
+ return numpy.save_file(np_tensors, filename, metadata=metadata)
74
+
75
+
76
+ def load(data: bytes) -> Dict[str, tf.Tensor]:
77
+ """
78
+ Loads a safetensors file into tensorflow format from pure bytes.
79
+
80
+ Args:
81
+ data (`bytes`):
82
+ The content of a safetensors file
83
+
84
+ Returns:
85
+ `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor` on cpu
86
+
87
+ Example:
88
+
89
+ ```python
90
+ from safetensors.tensorflow import load
91
+
92
+ file_path = "./my_folder/bert.safetensors"
93
+ with open(file_path, "rb") as f:
94
+ data = f.read()
95
+
96
+ loaded = load(data)
97
+ ```
98
+ """
99
+ flat = numpy.load(data)
100
+ return _np2tf(flat)
101
+
102
+
103
+ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, tf.Tensor]:
104
+ """
105
+ Loads a safetensors file into tensorflow format.
106
+
107
+ Args:
108
+ filename (`str`, or `os.PathLike`)):
109
+ The name of the file which contains the tensors
110
+
111
+ Returns:
112
+ `Dict[str, tf.Tensor]`: dictionary that contains name as key, value as `tf.Tensor`
113
+
114
+ Example:
115
+
116
+ ```python
117
+ from safetensors.tensorflow import load_file
118
+
119
+ file_path = "./my_folder/bert.safetensors"
120
+ loaded = load_file(file_path)
121
+ ```
122
+ """
123
+ result = {}
124
+ with safe_open(filename, framework="tf") as f:
125
+ for k in f.offset_keys():
126
+ result[k] = f.get_tensor(k)
127
+ return result
128
+
129
+
130
+ def _np2tf(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, tf.Tensor]:
131
+ for k, v in numpy_dict.items():
132
+ numpy_dict[k] = tf.convert_to_tensor(v)
133
+ return numpy_dict
134
+
135
+
136
+ def _tf2np(tf_dict: Dict[str, tf.Tensor]) -> Dict[str, np.array]:
137
+ for k, v in tf_dict.items():
138
+ tf_dict[k] = v.numpy()
139
+ return tf_dict