safetensors 0.7.0__pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- safetensors/__init__.py +10 -0
- safetensors/__init__.pyi +164 -0
- safetensors/_safetensors_rust.pypy310-pp73-aarch64-linux-gnu.so +0 -0
- safetensors/flax.py +138 -0
- safetensors/mlx.py +140 -0
- safetensors/numpy.py +187 -0
- safetensors/paddle.py +290 -0
- safetensors/py.typed +0 -0
- safetensors/tensorflow.py +139 -0
- safetensors/torch.py +550 -0
- safetensors-0.7.0.dist-info/METADATA +133 -0
- safetensors-0.7.0.dist-info/RECORD +14 -0
- safetensors-0.7.0.dist-info/WHEEL +5 -0
- safetensors-0.7.0.dist-info/licenses/LICENSE +201 -0
safetensors/__init__.py
ADDED
safetensors/__init__.pyi
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Generated content DO NOT EDIT
|
|
2
|
+
@staticmethod
|
|
3
|
+
def deserialize(bytes):
|
|
4
|
+
"""
|
|
5
|
+
Opens a safetensors lazily and returns tensors as asked
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
data (`bytes`):
|
|
9
|
+
The byte content of a file
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
(`List[str, Dict[str, Dict[str, any]]]`):
|
|
13
|
+
The deserialized content is like:
|
|
14
|
+
[("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
|
|
15
|
+
"""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def serialize(tensor_dict, metadata=None):
|
|
20
|
+
"""
|
|
21
|
+
Serializes raw data.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
tensor_dict (`Dict[str, Dict[Any]]`):
|
|
25
|
+
The tensor dict is like:
|
|
26
|
+
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
27
|
+
metadata (`Dict[str, str]`, *optional*):
|
|
28
|
+
The optional purely text annotations
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
(`bytes`):
|
|
32
|
+
The serialized content.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def serialize_file(tensor_dict, filename, metadata=None):
|
|
38
|
+
"""
|
|
39
|
+
Serializes raw data into file.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
tensor_dict (`Dict[str, Dict[Any]]`):
|
|
43
|
+
The tensor dict is like:
|
|
44
|
+
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
45
|
+
filename (`str`, or `os.PathLike`):
|
|
46
|
+
The name of the file to write into.
|
|
47
|
+
metadata (`Dict[str, str]`, *optional*):
|
|
48
|
+
The optional purely text annotations
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
(`NoneType`):
|
|
52
|
+
On success return None
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
class safe_open:
|
|
57
|
+
"""
|
|
58
|
+
Opens a safetensors lazily and returns tensors as asked
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
filename (`str`, or `os.PathLike`):
|
|
62
|
+
The filename to open
|
|
63
|
+
|
|
64
|
+
framework (`str`):
|
|
65
|
+
The framework you want you tensors in. Supported values:
|
|
66
|
+
`pt`, `tf`, `flax`, `numpy`.
|
|
67
|
+
|
|
68
|
+
device (`str`, defaults to `"cpu"`):
|
|
69
|
+
The device on which you want the tensors.
|
|
70
|
+
"""
|
|
71
|
+
def __init__(self, filename, framework, device=...):
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
def __enter__(self):
|
|
75
|
+
"""
|
|
76
|
+
Start the context manager
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
def __exit__(self, _exc_type, _exc_value, _traceback):
|
|
81
|
+
"""
|
|
82
|
+
Exits the context manager
|
|
83
|
+
"""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def get_slice(self, name):
|
|
87
|
+
"""
|
|
88
|
+
Returns a full slice view object
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
name (`str`):
|
|
92
|
+
The name of the tensor you want
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
(`PySafeSlice`):
|
|
96
|
+
A dummy object you can slice into to get a real tensor
|
|
97
|
+
Example:
|
|
98
|
+
```python
|
|
99
|
+
from safetensors import safe_open
|
|
100
|
+
|
|
101
|
+
with safe_open("model.safetensors", framework="pt", device=0) as f:
|
|
102
|
+
tensor_part = f.get_slice("embedding")[:, ::8]
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
"""
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
def get_tensor(self, name):
|
|
109
|
+
"""
|
|
110
|
+
Returns a full tensor
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
name (`str`):
|
|
114
|
+
The name of the tensor you want
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
(`Tensor`):
|
|
118
|
+
The tensor in the framework you opened the file for.
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
```python
|
|
122
|
+
from safetensors import safe_open
|
|
123
|
+
|
|
124
|
+
with safe_open("model.safetensors", framework="pt", device=0) as f:
|
|
125
|
+
tensor = f.get_tensor("embedding")
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
"""
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
def keys(self):
|
|
132
|
+
"""
|
|
133
|
+
Returns the names of the tensors in the file.
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
(`List[str]`):
|
|
137
|
+
The name of the tensors contained in that file
|
|
138
|
+
"""
|
|
139
|
+
pass
|
|
140
|
+
|
|
141
|
+
def metadata(self):
|
|
142
|
+
"""
|
|
143
|
+
Return the special non tensor information in the header
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
(`Dict[str, str]`):
|
|
147
|
+
The freeform metadata.
|
|
148
|
+
"""
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
def offset_keys(self):
|
|
152
|
+
"""
|
|
153
|
+
Returns the names of the tensors in the file, ordered by offset.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
(`List[str]`):
|
|
157
|
+
The name of the tensors contained in that file
|
|
158
|
+
"""
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
class SafetensorError(Exception):
|
|
162
|
+
"""
|
|
163
|
+
Custom Python Exception for Safetensor errors.
|
|
164
|
+
"""
|
|
Binary file
|
safetensors/flax.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, Optional, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
import jax.numpy as jnp
|
|
7
|
+
from jax import Array
|
|
8
|
+
from safetensors import numpy, safe_open
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def save(tensors: Dict[str, Array], metadata: Optional[Dict[str, str]] = None) -> bytes:
|
|
12
|
+
"""
|
|
13
|
+
Saves a dictionary of tensors into raw bytes in safetensors format.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
tensors (`Dict[str, Array]`):
|
|
17
|
+
The incoming tensors. Tensors need to be contiguous and dense.
|
|
18
|
+
metadata (`Dict[str, str]`, *optional*, defaults to `None`):
|
|
19
|
+
Optional text only metadata you might want to save in your header.
|
|
20
|
+
For instance it can be useful to specify more about the underlying
|
|
21
|
+
tensors. This is purely informative and does not affect tensor loading.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
`bytes`: The raw bytes representing the format
|
|
25
|
+
|
|
26
|
+
Example:
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from safetensors.flax import save
|
|
30
|
+
from jax import numpy as jnp
|
|
31
|
+
|
|
32
|
+
tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))}
|
|
33
|
+
byte_data = save(tensors)
|
|
34
|
+
```
|
|
35
|
+
"""
|
|
36
|
+
np_tensors = _jnp2np(tensors)
|
|
37
|
+
return numpy.save(np_tensors, metadata=metadata)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def save_file(
|
|
41
|
+
tensors: Dict[str, Array],
|
|
42
|
+
filename: Union[str, os.PathLike],
|
|
43
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
44
|
+
) -> None:
|
|
45
|
+
"""
|
|
46
|
+
Saves a dictionary of tensors into raw bytes in safetensors format.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
tensors (`Dict[str, Array]`):
|
|
50
|
+
The incoming tensors. Tensors need to be contiguous and dense.
|
|
51
|
+
filename (`str`, or `os.PathLike`)):
|
|
52
|
+
The filename we're saving into.
|
|
53
|
+
metadata (`Dict[str, str]`, *optional*, defaults to `None`):
|
|
54
|
+
Optional text only metadata you might want to save in your header.
|
|
55
|
+
For instance it can be useful to specify more about the underlying
|
|
56
|
+
tensors. This is purely informative and does not affect tensor loading.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
`None`
|
|
60
|
+
|
|
61
|
+
Example:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
from safetensors.flax import save_file
|
|
65
|
+
from jax import numpy as jnp
|
|
66
|
+
|
|
67
|
+
tensors = {"embedding": jnp.zeros((512, 1024)), "attention": jnp.zeros((256, 256))}
|
|
68
|
+
save_file(tensors, "model.safetensors")
|
|
69
|
+
```
|
|
70
|
+
"""
|
|
71
|
+
np_tensors = _jnp2np(tensors)
|
|
72
|
+
return numpy.save_file(np_tensors, filename, metadata=metadata)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def load(data: bytes) -> Dict[str, Array]:
|
|
76
|
+
"""
|
|
77
|
+
Loads a safetensors file into flax format from pure bytes.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
data (`bytes`):
|
|
81
|
+
The content of a safetensors file
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
`Dict[str, Array]`: dictionary that contains name as key, value as `Array` on cpu
|
|
85
|
+
|
|
86
|
+
Example:
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
from safetensors.flax import load
|
|
90
|
+
|
|
91
|
+
file_path = "./my_folder/bert.safetensors"
|
|
92
|
+
with open(file_path, "rb") as f:
|
|
93
|
+
data = f.read()
|
|
94
|
+
|
|
95
|
+
loaded = load(data)
|
|
96
|
+
```
|
|
97
|
+
"""
|
|
98
|
+
flat = numpy.load(data)
|
|
99
|
+
return _np2jnp(flat)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def load_file(filename: Union[str, os.PathLike]) -> Dict[str, Array]:
|
|
103
|
+
"""
|
|
104
|
+
Loads a safetensors file into flax format.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
filename (`str`, or `os.PathLike`)):
|
|
108
|
+
The name of the file which contains the tensors
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
`Dict[str, Array]`: dictionary that contains name as key, value as `Array`
|
|
112
|
+
|
|
113
|
+
Example:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
from safetensors.flax import load_file
|
|
117
|
+
|
|
118
|
+
file_path = "./my_folder/bert.safetensors"
|
|
119
|
+
loaded = load_file(file_path)
|
|
120
|
+
```
|
|
121
|
+
"""
|
|
122
|
+
result = {}
|
|
123
|
+
with safe_open(filename, framework="flax") as f:
|
|
124
|
+
for k in f.offset_keys():
|
|
125
|
+
result[k] = f.get_tensor(k)
|
|
126
|
+
return result
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _np2jnp(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, Array]:
|
|
130
|
+
for k, v in numpy_dict.items():
|
|
131
|
+
numpy_dict[k] = jnp.array(v)
|
|
132
|
+
return numpy_dict
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _jnp2np(jnp_dict: Dict[str, Array]) -> Dict[str, np.array]:
|
|
136
|
+
for k, v in jnp_dict.items():
|
|
137
|
+
jnp_dict[k] = np.asarray(v)
|
|
138
|
+
return jnp_dict
|
safetensors/mlx.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, Optional, Union
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
import mlx.core as mx
|
|
7
|
+
from safetensors import numpy, safe_open
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def save(
|
|
11
|
+
tensors: Dict[str, mx.array], metadata: Optional[Dict[str, str]] = None
|
|
12
|
+
) -> bytes:
|
|
13
|
+
"""
|
|
14
|
+
Saves a dictionary of tensors into raw bytes in safetensors format.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
tensors (`Dict[str, mx.array]`):
|
|
18
|
+
The incoming tensors. Tensors need to be contiguous and dense.
|
|
19
|
+
metadata (`Dict[str, str]`, *optional*, defaults to `None`):
|
|
20
|
+
Optional text only metadata you might want to save in your header.
|
|
21
|
+
For instance it can be useful to specify more about the underlying
|
|
22
|
+
tensors. This is purely informative and does not affect tensor loading.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
`bytes`: The raw bytes representing the format
|
|
26
|
+
|
|
27
|
+
Example:
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from safetensors.mlx import save
|
|
31
|
+
import mlx.core as mx
|
|
32
|
+
|
|
33
|
+
tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))}
|
|
34
|
+
byte_data = save(tensors)
|
|
35
|
+
```
|
|
36
|
+
"""
|
|
37
|
+
np_tensors = _mx2np(tensors)
|
|
38
|
+
return numpy.save(np_tensors, metadata=metadata)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def save_file(
|
|
42
|
+
tensors: Dict[str, mx.array],
|
|
43
|
+
filename: Union[str, os.PathLike],
|
|
44
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
45
|
+
) -> None:
|
|
46
|
+
"""
|
|
47
|
+
Saves a dictionary of tensors into raw bytes in safetensors format.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
tensors (`Dict[str, mx.array]`):
|
|
51
|
+
The incoming tensors. Tensors need to be contiguous and dense.
|
|
52
|
+
filename (`str`, or `os.PathLike`)):
|
|
53
|
+
The filename we're saving into.
|
|
54
|
+
metadata (`Dict[str, str]`, *optional*, defaults to `None`):
|
|
55
|
+
Optional text only metadata you might want to save in your header.
|
|
56
|
+
For instance it can be useful to specify more about the underlying
|
|
57
|
+
tensors. This is purely informative and does not affect tensor loading.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
`None`
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from safetensors.mlx import save_file
|
|
66
|
+
import mlx.core as mx
|
|
67
|
+
|
|
68
|
+
tensors = {"embedding": mx.zeros((512, 1024)), "attention": mx.zeros((256, 256))}
|
|
69
|
+
save_file(tensors, "model.safetensors")
|
|
70
|
+
```
|
|
71
|
+
"""
|
|
72
|
+
np_tensors = _mx2np(tensors)
|
|
73
|
+
return numpy.save_file(np_tensors, filename, metadata=metadata)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def load(data: bytes) -> Dict[str, mx.array]:
|
|
77
|
+
"""
|
|
78
|
+
Loads a safetensors file into MLX format from pure bytes.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
data (`bytes`):
|
|
82
|
+
The content of a safetensors file
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
`Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array`
|
|
86
|
+
|
|
87
|
+
Example:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
from safetensors.mlx import load
|
|
91
|
+
|
|
92
|
+
file_path = "./my_folder/bert.safetensors"
|
|
93
|
+
with open(file_path, "rb") as f:
|
|
94
|
+
data = f.read()
|
|
95
|
+
|
|
96
|
+
loaded = load(data)
|
|
97
|
+
```
|
|
98
|
+
"""
|
|
99
|
+
flat = numpy.load(data)
|
|
100
|
+
return _np2mx(flat)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def load_file(filename: Union[str, os.PathLike]) -> Dict[str, mx.array]:
|
|
104
|
+
"""
|
|
105
|
+
Loads a safetensors file into MLX format.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
filename (`str`, or `os.PathLike`)):
|
|
109
|
+
The name of the file which contains the tensors
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
`Dict[str, mx.array]`: dictionary that contains name as key, value as `mx.array`
|
|
113
|
+
|
|
114
|
+
Example:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from safetensors.flax import load_file
|
|
118
|
+
|
|
119
|
+
file_path = "./my_folder/bert.safetensors"
|
|
120
|
+
loaded = load_file(file_path)
|
|
121
|
+
```
|
|
122
|
+
"""
|
|
123
|
+
result = {}
|
|
124
|
+
with safe_open(filename, framework="mlx") as f:
|
|
125
|
+
for k in f.offset_keys():
|
|
126
|
+
result[k] = f.get_tensor(k)
|
|
127
|
+
return result
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _np2mx(numpy_dict: Dict[str, np.ndarray]) -> Dict[str, mx.array]:
|
|
131
|
+
for k, v in numpy_dict.items():
|
|
132
|
+
numpy_dict[k] = mx.array(v)
|
|
133
|
+
return numpy_dict
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _mx2np(mx_dict: Dict[str, mx.array]) -> Dict[str, np.array]:
|
|
137
|
+
new_dict = {}
|
|
138
|
+
for k, v in mx_dict.items():
|
|
139
|
+
new_dict[k] = np.asarray(v)
|
|
140
|
+
return new_dict
|
safetensors/numpy.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
from typing import Dict, Optional, Union
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
from safetensors import deserialize, safe_open, serialize, serialize_file
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _tobytes(tensor: np.ndarray) -> bytes:
|
|
11
|
+
if not _is_little_endian(tensor):
|
|
12
|
+
tensor = tensor.byteswap(inplace=False)
|
|
13
|
+
return tensor.tobytes()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def save(
|
|
17
|
+
tensor_dict: Dict[str, np.ndarray], metadata: Optional[Dict[str, str]] = None
|
|
18
|
+
) -> bytes:
|
|
19
|
+
"""
|
|
20
|
+
Saves a dictionary of tensors into raw bytes in safetensors format.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
tensor_dict (`Dict[str, np.ndarray]`):
|
|
24
|
+
The incoming tensors. Tensors need to be contiguous and dense.
|
|
25
|
+
metadata (`Dict[str, str]`, *optional*, defaults to `None`):
|
|
26
|
+
Optional text only metadata you might want to save in your header.
|
|
27
|
+
For instance it can be useful to specify more about the underlying
|
|
28
|
+
tensors. This is purely informative and does not affect tensor loading.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
`bytes`: The raw bytes representing the format
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from safetensors.numpy import save
|
|
37
|
+
import numpy as np
|
|
38
|
+
|
|
39
|
+
tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))}
|
|
40
|
+
byte_data = save(tensors)
|
|
41
|
+
```
|
|
42
|
+
"""
|
|
43
|
+
flattened = {
|
|
44
|
+
k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)}
|
|
45
|
+
for k, v in tensor_dict.items()
|
|
46
|
+
}
|
|
47
|
+
serialized = serialize(flattened, metadata=metadata)
|
|
48
|
+
result = bytes(serialized)
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def save_file(
|
|
53
|
+
tensor_dict: Dict[str, np.ndarray],
|
|
54
|
+
filename: Union[str, os.PathLike],
|
|
55
|
+
metadata: Optional[Dict[str, str]] = None,
|
|
56
|
+
) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Saves a dictionary of tensors into raw bytes in safetensors format.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
tensor_dict (`Dict[str, np.ndarray]`):
|
|
62
|
+
The incoming tensors. Tensors need to be contiguous and dense.
|
|
63
|
+
filename (`str`, or `os.PathLike`)):
|
|
64
|
+
The filename we're saving into.
|
|
65
|
+
metadata (`Dict[str, str]`, *optional*, defaults to `None`):
|
|
66
|
+
Optional text only metadata you might want to save in your header.
|
|
67
|
+
For instance it can be useful to specify more about the underlying
|
|
68
|
+
tensors. This is purely informative and does not affect tensor loading.
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
`None`
|
|
72
|
+
|
|
73
|
+
Example:
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from safetensors.numpy import save_file
|
|
77
|
+
import numpy as np
|
|
78
|
+
|
|
79
|
+
tensors = {"embedding": np.zeros((512, 1024)), "attention": np.zeros((256, 256))}
|
|
80
|
+
save_file(tensors, "model.safetensors")
|
|
81
|
+
```
|
|
82
|
+
"""
|
|
83
|
+
flattened = {
|
|
84
|
+
k: {"dtype": v.dtype.name, "shape": v.shape, "data": _tobytes(v)}
|
|
85
|
+
for k, v in tensor_dict.items()
|
|
86
|
+
}
|
|
87
|
+
serialize_file(flattened, filename, metadata=metadata)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def load(data: bytes) -> Dict[str, np.ndarray]:
|
|
91
|
+
"""
|
|
92
|
+
Loads a safetensors file into numpy format from pure bytes.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
data (`bytes`):
|
|
96
|
+
The content of a safetensors file
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
`Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray` on cpu
|
|
100
|
+
|
|
101
|
+
Example:
|
|
102
|
+
|
|
103
|
+
```python
|
|
104
|
+
from safetensors.numpy import load
|
|
105
|
+
|
|
106
|
+
file_path = "./my_folder/bert.safetensors"
|
|
107
|
+
with open(file_path, "rb") as f:
|
|
108
|
+
data = f.read()
|
|
109
|
+
|
|
110
|
+
loaded = load(data)
|
|
111
|
+
```
|
|
112
|
+
"""
|
|
113
|
+
flat = deserialize(data)
|
|
114
|
+
return _view2np(flat)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def load_file(filename: Union[str, os.PathLike]) -> Dict[str, np.ndarray]:
|
|
118
|
+
"""
|
|
119
|
+
Loads a safetensors file into numpy format.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
filename (`str`, or `os.PathLike`)):
|
|
123
|
+
The name of the file which contains the tensors
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
`Dict[str, np.ndarray]`: dictionary that contains name as key, value as `np.ndarray`
|
|
127
|
+
|
|
128
|
+
Example:
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from safetensors.numpy import load_file
|
|
132
|
+
|
|
133
|
+
file_path = "./my_folder/bert.safetensors"
|
|
134
|
+
loaded = load_file(file_path)
|
|
135
|
+
```
|
|
136
|
+
"""
|
|
137
|
+
result = {}
|
|
138
|
+
with safe_open(filename, framework="np") as f:
|
|
139
|
+
for k in f.offset_keys():
|
|
140
|
+
result[k] = f.get_tensor(k)
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
_TYPES = {
|
|
145
|
+
"F64": np.float64,
|
|
146
|
+
"F32": np.float32,
|
|
147
|
+
"F16": np.float16,
|
|
148
|
+
"I64": np.int64,
|
|
149
|
+
"U64": np.uint64,
|
|
150
|
+
"I32": np.int32,
|
|
151
|
+
"U32": np.uint32,
|
|
152
|
+
"I16": np.int16,
|
|
153
|
+
"U16": np.uint16,
|
|
154
|
+
"I8": np.int8,
|
|
155
|
+
"U8": np.uint8,
|
|
156
|
+
"BOOL": bool,
|
|
157
|
+
"C64": np.complex64,
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _getdtype(dtype_str: str) -> np.dtype:
|
|
162
|
+
return _TYPES[dtype_str]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _view2np(safeview) -> Dict[str, np.ndarray]:
|
|
166
|
+
result = {}
|
|
167
|
+
for k, v in safeview:
|
|
168
|
+
dtype = _getdtype(v["dtype"])
|
|
169
|
+
arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])
|
|
170
|
+
result[k] = arr
|
|
171
|
+
return result
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _is_little_endian(tensor: np.ndarray) -> bool:
|
|
175
|
+
byteorder = tensor.dtype.byteorder
|
|
176
|
+
if byteorder == "=":
|
|
177
|
+
if sys.byteorder == "little":
|
|
178
|
+
return True
|
|
179
|
+
else:
|
|
180
|
+
return False
|
|
181
|
+
elif byteorder == "|":
|
|
182
|
+
return True
|
|
183
|
+
elif byteorder == "<":
|
|
184
|
+
return True
|
|
185
|
+
elif byteorder == ">":
|
|
186
|
+
return False
|
|
187
|
+
raise ValueError(f"Unexpected byte order {byteorder}")
|