safetensors 0.7.0__pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ # Re-export this
2
+ from ._safetensors_rust import ( # noqa: F401
3
+ SafetensorError,
4
+ __version__,
5
+ deserialize,
6
+ safe_open,
7
+ _safe_open_handle,
8
+ serialize,
9
+ serialize_file,
10
+ )
@@ -0,0 +1,164 @@
1
+ # Generated content DO NOT EDIT
2
+ @staticmethod
3
+ def deserialize(bytes):
4
+ """
5
+ Opens a safetensors lazily and returns tensors as asked
6
+
7
+ Args:
8
+ data (`bytes`):
9
+ The byte content of a file
10
+
11
+ Returns:
12
+ (`List[str, Dict[str, Dict[str, any]]]`):
13
+ The deserialized content is like:
14
+ [("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
15
+ """
16
+ pass
17
+
18
+ @staticmethod
19
+ def serialize(tensor_dict, metadata=None):
20
+ """
21
+ Serializes raw data.
22
+
23
+ Args:
24
+ tensor_dict (`Dict[str, Dict[Any]]`):
25
+ The tensor dict is like:
26
+ {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
27
+ metadata (`Dict[str, str]`, *optional*):
28
+ The optional purely text annotations
29
+
30
+ Returns:
31
+ (`bytes`):
32
+ The serialized content.
33
+ """
34
+ pass
35
+
36
+ @staticmethod
37
+ def serialize_file(tensor_dict, filename, metadata=None):
38
+ """
39
+ Serializes raw data into file.
40
+
41
+ Args:
42
+ tensor_dict (`Dict[str, Dict[Any]]`):
43
+ The tensor dict is like:
44
+ {"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
45
+ filename (`str`, or `os.PathLike`):
46
+ The name of the file to write into.
47
+ metadata (`Dict[str, str]`, *optional*):
48
+ The optional purely text annotations
49
+
50
+ Returns:
51
+ (`NoneType`):
52
+ On success return None
53
+ """
54
+ pass
55
+
56
+ class safe_open:
57
+ """
58
+ Opens a safetensors lazily and returns tensors as asked
59
+
60
+ Args:
61
+ filename (`str`, or `os.PathLike`):
62
+ The filename to open
63
+
64
+ framework (`str`):
65
+ The framework you want you tensors in. Supported values:
66
+ `pt`, `tf`, `flax`, `numpy`.
67
+
68
+ device (`str`, defaults to `"cpu"`):
69
+ The device on which you want the tensors.
70
+ """
71
+ def __init__(self, filename, framework, device=...):
72
+ pass
73
+
74
+ def __enter__(self):
75
+ """
76
+ Start the context manager
77
+ """
78
+ pass
79
+
80
+ def __exit__(self, _exc_type, _exc_value, _traceback):
81
+ """
82
+ Exits the context manager
83
+ """
84
+ pass
85
+
86
+ def get_slice(self, name):
87
+ """
88
+ Returns a full slice view object
89
+
90
+ Args:
91
+ name (`str`):
92
+ The name of the tensor you want
93
+
94
+ Returns:
95
+ (`PySafeSlice`):
96
+ A dummy object you can slice into to get a real tensor
97
+ Example:
98
+ ```python
99
+ from safetensors import safe_open
100
+
101
+ with safe_open("model.safetensors", framework="pt", device=0) as f:
102
+ tensor_part = f.get_slice("embedding")[:, ::8]
103
+
104
+ ```
105
+ """
106
+ pass
107
+
108
+ def get_tensor(self, name):
109
+ """
110
+ Returns a full tensor
111
+
112
+ Args:
113
+ name (`str`):
114
+ The name of the tensor you want
115
+
116
+ Returns:
117
+ (`Tensor`):
118
+ The tensor in the framework you opened the file for.
119
+
120
+ Example:
121
+ ```python
122
+ from safetensors import safe_open
123
+
124
+ with safe_open("model.safetensors", framework="pt", device=0) as f:
125
+ tensor = f.get_tensor("embedding")
126
+
127
+ ```
128
+ """
129
+ pass
130
+
131
+ def keys(self):
132
+ """
133
+ Returns the names of the tensors in the file.
134
+
135
+ Returns:
136
+ (`List[str]`):
137
+ The name of the tensors contained in that file
138
+ """
139
+ pass
140
+
141
+ def metadata(self):
142
+ """
143
+ Return the special non tensor information in the header
144
+
145
+ Returns:
146
+ (`Dict[str, str]`):
147
+ The freeform metadata.
148
+ """
149
+ pass
150
+
151
+ def offset_keys(self):
152
+ """
153
+ Returns the names of the tensors in the file, ordered by offset.
154
+
155
+ Returns:
156
+ (`List[str]`):
157
+ The name of the tensors contained in that file
158
+ """
159
+ pass
160
+
161
+ class SafetensorError(Exception):
162
+ """
163
+ Custom Python Exception for Safetensor errors.
164
+ """
safetensors/flax.py ADDED
@@ -0,0 +1,138 @@
1
+ import os
2
+ from typing import Dict, Optional, Union
3
+
4
+ import numpy as np
5
+
6
+ import jax.numpy as jnp
7
+ from jax import Array
8
+ from safetensors import numpy, safe_open
9
+
10
+
11
+ def save(tensors: Dict[str, Array], metadata: Optional[Dict[str, str]] = None) -> bytes:
12
+ """
13
+ Saves a dictionary of tensors into raw bytes in safetensors format.
14
+
15
+ Args:
16
+ tensors (`Dict[str, Array]`):
17
+ The incoming tensors. Tensors need to be contiguous and dense.
18
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
19
+ Optional text only metadata you might want to save in your header.
20
+ For instance it can be useful to specify more about the underlying
21
+ tensors. This is purely informative and does not affect tensor loading.
22
+
23
+ Returns:
24
+ `bytes`: The raw bytes representing the format
25
+
26
+ Example:
27
+
28
+ ```python
29
+ from safetensors.flax import save
30
+ from jax import numpy as jnp
31
+
32
+ tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))}
33
+ byte_data = save(tensors)
34
+ ```
35
+ """
36
+ np_tensors = _jnp2np(tensors)
37
+ return numpy.save(np_tensors, metadata=metadata)
38
+
39
+
40
+ def save_file(
41
+ tensors: Dict[str, Array],
42
+ filename: Union[str, os.PathLike],
43
+ metadata: Optional[Dict[str, str]] = None,
44
+ ) -> None:
45
+ """
46
+ Saves a dictionary of tensors into raw bytes in safetensors format.
47
+
48
+ Args:
49
+ tensors (`Dict[str, Array]`):
50
+ The incoming tensors. Tensors need to be contiguous and dense.
51
+ filename (`str`, or `os.PathLike`)):
52
+ The filename we're saving into.
53
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
54
+ Optional text only metadata you might want to save in your header.
55
+ For instance it can be useful to specify more about the underlying
56
+ tensors. This is purely informative and does not affect tensor loading.
57
+
58
+ Returns:
59
+ `None`
60
+
61
+ Example:
62
+
63
+ ```python
64
+ from safetensors.flax import save_file
65
+ from jax import numpy as jnp
66
+
67
+ tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))}
68
+ save_file(tensors, "model.safetensors")
69
+ ```
70
+ """
71
+ np_tensors = _jnp2np(tensors)
72
+ return numpy.save_file(np_tensors, filename, metadata=metadata)
73
+
74
+
75
+ def load(data: bytes) -> Dict[str, Array]:
76
+ """
77
+ Loads a safetensors file into flax format from pure bytes.
78
+
79
+ Args:
80
+ data (`bytes`):
81
+ The content of a safetensors file
82
+
83
+ Returns:
84
+ `Dict[str, Array]`: dictionary that contains name as key, value as `Array` on cpu
85
+
86
+ Example:
87
+
88
+ ```python
89
+ from safetensors.flax import load
90
+
91
+ file_path = "./my_folder/bert.safetensors"
92
+ with open(file_path, "rb") as f:
93
+ data = f.read()
94
+
95
+ loaded = load(data)
96
+ ```
97
+ """
98
+ flat = numpy.load(data)
99
+ return _np2jnp(flat)
100
+
101
+
102
+ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, Array]:
103
+ """
104
+ Loads a safetensors file into flax format.
105
+
106
+ Args:
107
+ filename (`str`, or `os.PathLike`)):
108
+ The name of the file which contains the tensors
109
+
110
+ Returns:
111
+ `Dict[str, Array]`: dictionary that contains name as key, value as `Array`
112
+
113
+ Example:
114
+
115
+ ```python
116
+ from safetensors.flax import load_file
117
+
118
+ file_path = "./my_folder/bert.safetensors"
119
+ loaded = load_file(file_path)
120
+ ```
121
+ """
122
+ result = {}
123
+ with safe_open(filename, framework="flax") as f:
124
+ for k in f.offset_keys():
125
+ result[k] = f.get_tensor(k)
126
+ return result
127
+
128
+
129
+ def _np2jnp(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, Array]:
130
+ for k, v in numpy_dict.items():
131
+ numpy_dict[k] = jnp.array(v)
132
+ return numpy_dict
133
+
134
+
135
+ def _jnp2np(jnp_dict: Dict[str, Array]) -> Dict[str, np.array]:
136
+ for k, v in jnp_dict.items():
137
+ jnp_dict[k] = np.asarray(v)
138
+ return jnp_dict
safetensors/mlx.py ADDED
@@ -0,0 +1,140 @@
1
+ import os
2
+ from typing import Dict, Optional, Union
3
+
4
+ import numpy as np
5
+
6
+ import mlx.core as mx
7
+ from safetensors import numpy, safe_open
8
+
9
+
10
+ def save(
11
+ tensors: Dict[str, mx.array], metadata: Optional[Dict[str, str]] = None
12
+ ) -> bytes:
13
+ """
14
+ Saves a dictionary of tensors into raw bytes in safetensors format.
15
+
16
+ Args:
17
+ tensors (`Dict[str, mx.array]`):
18
+ The incoming tensors. Tensors need to be contiguous and dense.
19
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
20
+ Optional text only metadata you might want to save in your header.
21
+ For instance it can be useful to specify more about the underlying
22
+ tensors. This is purely informative and does not affect tensor loading.
23
+
24
+ Returns:
25
+ `bytes`: The raw bytes representing the format
26
+
27
+ Example:
28
+
29
+ ```python
30
+ from safetensors.mlx import save
31
+ import mlx.core as mx
32
+
33
+ tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))}
34
+ byte_data = save(tensors)
35
+ ```
36
+ """
37
+ np_tensors = _mx2np(tensors)
38
+ return numpy.save(np_tensors, metadata=metadata)
39
+
40
+
41
+ def save_file(
42
+ tensors: Dict[str, mx.array],
43
+ filename: Union[str, os.PathLike],
44
+ metadata: Optional[Dict[str, str]] = None,
45
+ ) -> None:
46
+ """
47
+ Saves a dictionary of tensors into raw bytes in safetensors format.
48
+
49
+ Args:
50
+ tensors (`Dict[str, mx.array]`):
51
+ The incoming tensors. Tensors need to be contiguous and dense.
52
+ filename (`str`, or `os.PathLike`)):
53
+ The filename we're saving into.
54
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
55
+ Optional text only metadata you might want to save in your header.
56
+ For instance it can be useful to specify more about the underlying
57
+ tensors. This is purely informative and does not affect tensor loading.
58
+
59
+ Returns:
60
+ `None`
61
+
62
+ Example:
63
+
64
+ ```python
65
+ from safetensors.mlx import save_file
66
+ import mlx.core as mx
67
+
68
+ tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))}
69
+ save_file(tensors, "model.safetensors")
70
+ ```
71
+ """
72
+ np_tensors = _mx2np(tensors)
73
+ return numpy.save_file(np_tensors, filename, metadata=metadata)
74
+
75
+
76
+ def load(data: bytes) -> Dict[str, mx.array]:
77
+ """
78
+ Loads a safetensors file into MLX format from pure bytes.
79
+
80
+ Args:
81
+ data (`bytes`):
82
+ The content of a safetensors file
83
+
84
+ Returns:
85
+ `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array`
86
+
87
+ Example:
88
+
89
+ ```python
90
+ from safetensors.mlx import load
91
+
92
+ file_path = "./my_folder/bert.safetensors"
93
+ with open(file_path, "rb") as f:
94
+ data = f.read()
95
+
96
+ loaded = load(data)
97
+ ```
98
+ """
99
+ flat = numpy.load(data)
100
+ return _np2mx(flat)
101
+
102
+
103
+ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, mx.array]:
104
+ """
105
+ Loads a safetensors file into MLX format.
106
+
107
+ Args:
108
+ filename (`str`, or `os.PathLike`)):
109
+ The name of the file which contains the tensors
110
+
111
+ Returns:
112
+ `Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array`
113
+
114
+ Example:
115
+
116
+ ```python
117
+ from safetensors.flax import load_file
118
+
119
+ file_path = "./my_folder/bert.safetensors"
120
+ loaded = load_file(file_path)
121
+ ```
122
+ """
123
+ result = {}
124
+ with safe_open(filename, framework="mlx") as f:
125
+ for k in f.offset_keys():
126
+ result[k] = f.get_tensor(k)
127
+ return result
128
+
129
+
130
+ def _np2mx(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, mx.array]:
131
+ for k, v in numpy_dict.items():
132
+ numpy_dict[k] = mx.array(v)
133
+ return numpy_dict
134
+
135
+
136
+ def _mx2np(mx_dict: Dict[str, mx.array]) -> Dict[str, np.array]:
137
+ new_dict = {}
138
+ for k, v in mx_dict.items():
139
+ new_dict[k] = np.asarray(v)
140
+ return new_dict
safetensors/numpy.py ADDED
@@ -0,0 +1,187 @@
1
+ import os
2
+ import sys
3
+ from typing import Dict, Optional, Union
4
+
5
+ import numpy as np
6
+
7
+ from safetensors import deserialize, safe_open, serialize, serialize_file
8
+
9
+
10
+ def _tobytes(tensor: np.ndarray) -> bytes:
11
+ if not _is_little_endian(tensor):
12
+ tensor = tensor.byteswap(inplace=False)
13
+ return tensor.tobytes()
14
+
15
+
16
+ def save(
17
+ tensor_dict: Dict[str, np.ndarray], metadata: Optional[Dict[str, str]] = None
18
+ ) -> bytes:
19
+ """
20
+ Saves a dictionary of tensors into raw bytes in safetensors format.
21
+
22
+ Args:
23
+ tensor_dict (`Dict[str, np.ndarray]`):
24
+ The incoming tensors. Tensors need to be contiguous and dense.
25
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
26
+ Optional text only metadata you might want to save in your header.
27
+ For instance it can be useful to specify more about the underlying
28
+ tensors. This is purely informative and does not affect tensor loading.
29
+
30
+ Returns:
31
+ `bytes`: The raw bytes representing the format
32
+
33
+ Example:
34
+
35
+ ```python
36
+ from safetensors.numpy import save
37
+ import numpy as np
38
+
39
+ tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))}
40
+ byte_data = save(tensors)
41
+ ```
42
+ """
43
+ flattened = {
44
+ k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)}
45
+ for k, v in tensor_dict.items()
46
+ }
47
+ serialized = serialize(flattened, metadata=metadata)
48
+ result = bytes(serialized)
49
+ return result
50
+
51
+
52
+ def save_file(
53
+ tensor_dict: Dict[str, np.ndarray],
54
+ filename: Union[str, os.PathLike],
55
+ metadata: Optional[Dict[str, str]] = None,
56
+ ) -> None:
57
+ """
58
+ Saves a dictionary of tensors into raw bytes in safetensors format.
59
+
60
+ Args:
61
+ tensor_dict (`Dict[str, np.ndarray]`):
62
+ The incoming tensors. Tensors need to be contiguous and dense.
63
+ filename (`str`, or `os.PathLike`)):
64
+ The filename we're saving into.
65
+ metadata (`Dict[str, str]`, *optional*, defaults to `None`):
66
+ Optional text only metadata you might want to save in your header.
67
+ For instance it can be useful to specify more about the underlying
68
+ tensors. This is purely informative and does not affect tensor loading.
69
+
70
+ Returns:
71
+ `None`
72
+
73
+ Example:
74
+
75
+ ```python
76
+ from safetensors.numpy import save_file
77
+ import numpy as np
78
+
79
+ tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))}
80
+ save_file(tensors, "model.safetensors")
81
+ ```
82
+ """
83
+ flattened = {
84
+ k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)}
85
+ for k, v in tensor_dict.items()
86
+ }
87
+ serialize_file(flattened, filename, metadata=metadata)
88
+
89
+
90
+ def load(data: bytes) -> Dict[str, np.ndarray]:
91
+ """
92
+ Loads a safetensors file into numpy format from pure bytes.
93
+
94
+ Args:
95
+ data (`bytes`):
96
+ The content of a safetensors file
97
+
98
+ Returns:
99
+ `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` on cpu
100
+
101
+ Example:
102
+
103
+ ```python
104
+ from safetensors.numpy import load
105
+
106
+ file_path = "./my_folder/bert.safetensors"
107
+ with open(file_path, "rb") as f:
108
+ data = f.read()
109
+
110
+ loaded = load(data)
111
+ ```
112
+ """
113
+ flat = deserialize(data)
114
+ return _view2np(flat)
115
+
116
+
117
+ def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
118
+ """
119
+ Loads a safetensors file into numpy format.
120
+
121
+ Args:
122
+ filename (`str`, or `os.PathLike`)):
123
+ The name of the file which contains the tensors
124
+
125
+ Returns:
126
+ `Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray`
127
+
128
+ Example:
129
+
130
+ ```python
131
+ from safetensors.numpy import load_file
132
+
133
+ file_path = "./my_folder/bert.safetensors"
134
+ loaded = load_file(file_path)
135
+ ```
136
+ """
137
+ result = {}
138
+ with safe_open(filename, framework="np") as f:
139
+ for k in f.offset_keys():
140
+ result[k] = f.get_tensor(k)
141
+ return result
142
+
143
+
144
+ _TYPES = {
145
+ "F64": np.float64,
146
+ "F32": np.float32,
147
+ "F16": np.float16,
148
+ "I64": np.int64,
149
+ "U64": np.uint64,
150
+ "I32": np.int32,
151
+ "U32": np.uint32,
152
+ "I16": np.int16,
153
+ "U16": np.uint16,
154
+ "I8": np.int8,
155
+ "U8": np.uint8,
156
+ "BOOL": bool,
157
+ "C64": np.complex64,
158
+ }
159
+
160
+
161
+ def _getdtype(dtype_str: str) -> np.dtype:
162
+ return _TYPES[dtype_str]
163
+
164
+
165
+ def _view2np(safeview) -> Dict[str, np.ndarray]:
166
+ result = {}
167
+ for k, v in safeview:
168
+ dtype = _getdtype(v["dtype"])
169
+ arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])
170
+ result[k] = arr
171
+ return result
172
+
173
+
174
+ def _is_little_endian(tensor: np.ndarray) -> bool:
175
+ byteorder = tensor.dtype.byteorder
176
+ if byteorder == "=":
177
+ if sys.byteorder == "little":
178
+ return True
179
+ else:
180
+ return False
181
+ elif byteorder == "|":
182
+ return True
183
+ elif byteorder == "<":
184
+ return True
185
+ elif byteorder == ">":
186
+ return False
187
+ raise ValueError(f"Unexpected byte order {byteorder}")