safetensors 0.5.0rc0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of safetensors might be problematic. Click here for more details.
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/PKG-INFO +1 -1
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/Cargo.lock +6 -6
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/Cargo.toml +1 -1
- safetensors-0.5.2/bindings/python/py_src/safetensors/__init__.pyi +149 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/src/lib.rs +3 -1
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/stub.py +15 -4
- safetensors-0.5.2/py_src/safetensors/__init__.pyi +149 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/safetensors/Cargo.toml +9 -3
- safetensors-0.5.2/safetensors/src/lib.rs +43 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/safetensors/src/slice.rs +5 -5
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/safetensors/src/tensor.rs +26 -19
- safetensors-0.5.0rc0/bindings/python/py_src/safetensors/__init__.pyi +0 -73
- safetensors-0.5.0rc0/py_src/safetensors/__init__.pyi +0 -73
- safetensors-0.5.0rc0/safetensors/src/lib.rs +0 -5
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/.gitignore +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/MANIFEST.in +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/Makefile +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/README.md +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/benches/test_flax.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/benches/test_mlx.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/benches/test_paddle.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/benches/test_pt.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/benches/test_tf.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/convert.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/convert_all.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/fuzz.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/__init__.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/flax.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/mlx.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/numpy.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/paddle.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/py.typed +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/tensorflow.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/py_src/safetensors/torch.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/setup.cfg +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/data/__init__.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_flax_comparison.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_mlx_comparison.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_paddle_comparison.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_pt_comparison.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_pt_model.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_simple.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/bindings/python/tests/test_tf_comparison.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/__init__.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/flax.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/mlx.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/numpy.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/paddle.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/py.typed +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/tensorflow.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/py_src/safetensors/torch.py +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/pyproject.toml +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/safetensors/LICENSE +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/safetensors/README.md +0 -0
- {safetensors-0.5.0rc0 → safetensors-0.5.2}/safetensors/benches/benchmark.rs +0 -0
|
@@ -163,7 +163,7 @@ checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
|
|
|
163
163
|
|
|
164
164
|
[[package]]
|
|
165
165
|
name = "safetensors"
|
|
166
|
-
version = "0.5.
|
|
166
|
+
version = "0.5.2"
|
|
167
167
|
dependencies = [
|
|
168
168
|
"serde",
|
|
169
169
|
"serde_json",
|
|
@@ -171,7 +171,7 @@ dependencies = [
|
|
|
171
171
|
|
|
172
172
|
[[package]]
|
|
173
173
|
name = "safetensors-python"
|
|
174
|
-
version = "0.5.
|
|
174
|
+
version = "0.5.2"
|
|
175
175
|
dependencies = [
|
|
176
176
|
"memmap2",
|
|
177
177
|
"pyo3",
|
|
@@ -201,9 +201,9 @@ dependencies = [
|
|
|
201
201
|
|
|
202
202
|
[[package]]
|
|
203
203
|
name = "serde_json"
|
|
204
|
-
version = "1.0.
|
|
204
|
+
version = "1.0.135"
|
|
205
205
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
206
|
-
checksum = "
|
|
206
|
+
checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9"
|
|
207
207
|
dependencies = [
|
|
208
208
|
"itoa",
|
|
209
209
|
"memchr",
|
|
@@ -213,9 +213,9 @@ dependencies = [
|
|
|
213
213
|
|
|
214
214
|
[[package]]
|
|
215
215
|
name = "syn"
|
|
216
|
-
version = "2.0.
|
|
216
|
+
version = "2.0.95"
|
|
217
217
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
218
|
-
checksum = "
|
|
218
|
+
checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a"
|
|
219
219
|
dependencies = [
|
|
220
220
|
"proc-macro2",
|
|
221
221
|
"quote",
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Generated content DO NOT EDIT
|
|
2
|
+
@staticmethod
|
|
3
|
+
def deserialize(bytes):
|
|
4
|
+
"""
|
|
5
|
+
Opens a safetensors lazily and returns tensors as asked
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
data (`bytes`):
|
|
9
|
+
The byte content of a file
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
(`List[str, Dict[str, Dict[str, any]]]`):
|
|
13
|
+
The deserialized content is like:
|
|
14
|
+
[("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
|
|
15
|
+
"""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def serialize(tensor_dict, metadata=None):
|
|
20
|
+
"""
|
|
21
|
+
Serializes raw data.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
tensor_dict (`Dict[str, Dict[Any]]`):
|
|
25
|
+
The tensor dict is like:
|
|
26
|
+
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
27
|
+
metadata (`Dict[str, str]`, *optional*):
|
|
28
|
+
The optional purely text annotations
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
(`bytes`):
|
|
32
|
+
The serialized content.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def serialize_file(tensor_dict, filename, metadata=None):
|
|
38
|
+
"""
|
|
39
|
+
Serializes raw data.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
tensor_dict (`Dict[str, Dict[Any]]`):
|
|
43
|
+
The tensor dict is like:
|
|
44
|
+
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
45
|
+
filename (`str`, or `os.PathLike`):
|
|
46
|
+
The name of the file to write into.
|
|
47
|
+
metadata (`Dict[str, str]`, *optional*):
|
|
48
|
+
The optional purely text annotations
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
(`bytes`):
|
|
52
|
+
The serialized content.
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
class safe_open:
|
|
57
|
+
"""
|
|
58
|
+
Opens a safetensors lazily and returns tensors as asked
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
filename (`str`, or `os.PathLike`):
|
|
62
|
+
The filename to open
|
|
63
|
+
|
|
64
|
+
framework (`str`):
|
|
65
|
+
The framework you want you tensors in. Supported values:
|
|
66
|
+
`pt`, `tf`, `flax`, `numpy`.
|
|
67
|
+
|
|
68
|
+
device (`str`, defaults to `"cpu"`):
|
|
69
|
+
The device on which you want the tensors.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, filename, framework, device=...):
|
|
73
|
+
pass
|
|
74
|
+
def __enter__(self):
|
|
75
|
+
"""
|
|
76
|
+
Start the context manager
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
def __exit__(self, _exc_type, _exc_value, _traceback):
|
|
80
|
+
"""
|
|
81
|
+
Exits the context manager
|
|
82
|
+
"""
|
|
83
|
+
pass
|
|
84
|
+
def get_slice(self, name):
|
|
85
|
+
"""
|
|
86
|
+
Returns a full slice view object
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
name (`str`):
|
|
90
|
+
The name of the tensor you want
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
(`PySafeSlice`):
|
|
94
|
+
A dummy object you can slice into to get a real tensor
|
|
95
|
+
Example:
|
|
96
|
+
```python
|
|
97
|
+
from safetensors import safe_open
|
|
98
|
+
|
|
99
|
+
with safe_open("model.safetensors", framework="pt", device=0) as f:
|
|
100
|
+
tensor_part = f.get_slice("embedding")[:, ::8]
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
"""
|
|
104
|
+
pass
|
|
105
|
+
def get_tensor(self, name):
|
|
106
|
+
"""
|
|
107
|
+
Returns a full tensor
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
name (`str`):
|
|
111
|
+
The name of the tensor you want
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
(`Tensor`):
|
|
115
|
+
The tensor in the framework you opened the file for.
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
```python
|
|
119
|
+
from safetensors import safe_open
|
|
120
|
+
|
|
121
|
+
with safe_open("model.safetensors", framework="pt", device=0) as f:
|
|
122
|
+
tensor = f.get_tensor("embedding")
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
"""
|
|
126
|
+
pass
|
|
127
|
+
def keys(self):
|
|
128
|
+
"""
|
|
129
|
+
Returns the names of the tensors in the file.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
(`List[str]`):
|
|
133
|
+
The name of the tensors contained in that file
|
|
134
|
+
"""
|
|
135
|
+
pass
|
|
136
|
+
def metadata(self):
|
|
137
|
+
"""
|
|
138
|
+
Return the special non tensor information in the header
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
(`Dict[str, str]`):
|
|
142
|
+
The freeform metadata.
|
|
143
|
+
"""
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
class SafetensorError(Exception):
|
|
147
|
+
"""
|
|
148
|
+
Custom Python Exception for Safetensor errors.
|
|
149
|
+
"""
|
|
@@ -406,7 +406,7 @@ impl Open {
|
|
|
406
406
|
|
|
407
407
|
// SAFETY: Mmap is used to prevent allocating in Rust
|
|
408
408
|
// before making a copy within Python.
|
|
409
|
-
let buffer = unsafe { MmapOptions::new().
|
|
409
|
+
let buffer = unsafe { MmapOptions::new().map_copy_read_only(&file)? };
|
|
410
410
|
|
|
411
411
|
let (n, metadata) = SafeTensors::read_metadata(&buffer).map_err(|e| {
|
|
412
412
|
SafetensorError::new_err(format!("Error while deserializing header: {e:?}"))
|
|
@@ -755,10 +755,12 @@ impl safe_open {
|
|
|
755
755
|
self.inner()?.get_slice(name)
|
|
756
756
|
}
|
|
757
757
|
|
|
758
|
+
/// Start the context manager
|
|
758
759
|
pub fn __enter__(slf: Py<Self>) -> Py<Self> {
|
|
759
760
|
slf
|
|
760
761
|
}
|
|
761
762
|
|
|
763
|
+
/// Exits the context manager
|
|
762
764
|
pub fn __exit__(&mut self, _exc_type: PyObject, _exc_value: PyObject, _traceback: PyObject) {
|
|
763
765
|
self.inner = None;
|
|
764
766
|
}
|
|
@@ -39,7 +39,11 @@ def member_sort(member):
|
|
|
39
39
|
def fn_predicate(obj):
|
|
40
40
|
value = inspect.ismethoddescriptor(obj) or inspect.isbuiltin(obj)
|
|
41
41
|
if value:
|
|
42
|
-
return
|
|
42
|
+
return (
|
|
43
|
+
obj.__doc__
|
|
44
|
+
and obj.__text_signature__
|
|
45
|
+
and (not obj.__name__.startswith("_") or obj.__name__ in {"__enter__", "__exit__"})
|
|
46
|
+
)
|
|
43
47
|
if inspect.isgetsetdescriptor(obj):
|
|
44
48
|
return obj.__doc__ and not obj.__name__.startswith("_")
|
|
45
49
|
return False
|
|
@@ -80,11 +84,12 @@ def pyi_file(obj, indent=""):
|
|
|
80
84
|
|
|
81
85
|
# Init
|
|
82
86
|
if obj.__text_signature__:
|
|
83
|
-
|
|
87
|
+
signature = obj.__text_signature__.replace("(", "(self, ")
|
|
88
|
+
body += f"{indent}def __init__{signature}:\n"
|
|
84
89
|
body += f"{indent+INDENT}pass\n"
|
|
85
90
|
body += "\n"
|
|
86
91
|
|
|
87
|
-
for
|
|
92
|
+
for name, fn in fns:
|
|
88
93
|
body += pyi_file(fn, indent=indent)
|
|
89
94
|
|
|
90
95
|
if not body:
|
|
@@ -130,6 +135,7 @@ def do_black(content, is_pyi):
|
|
|
130
135
|
experimental_string_processing=False,
|
|
131
136
|
)
|
|
132
137
|
try:
|
|
138
|
+
content = content.replace("$self", "self")
|
|
133
139
|
return black.format_file_contents(content, fast=True, mode=mode)
|
|
134
140
|
except black.NothingChanged:
|
|
135
141
|
return content
|
|
@@ -184,4 +190,9 @@ if __name__ == "__main__":
|
|
|
184
190
|
args = parser.parse_args()
|
|
185
191
|
import safetensors
|
|
186
192
|
|
|
187
|
-
write(
|
|
193
|
+
write(
|
|
194
|
+
safetensors._safetensors_rust,
|
|
195
|
+
"py_src/safetensors/",
|
|
196
|
+
"safetensors",
|
|
197
|
+
check=args.check,
|
|
198
|
+
)
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Generated content DO NOT EDIT
|
|
2
|
+
@staticmethod
|
|
3
|
+
def deserialize(bytes):
|
|
4
|
+
"""
|
|
5
|
+
Opens a safetensors lazily and returns tensors as asked
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
data (`bytes`):
|
|
9
|
+
The byte content of a file
|
|
10
|
+
|
|
11
|
+
Returns:
|
|
12
|
+
(`List[str, Dict[str, Dict[str, any]]]`):
|
|
13
|
+
The deserialized content is like:
|
|
14
|
+
[("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
|
|
15
|
+
"""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def serialize(tensor_dict, metadata=None):
|
|
20
|
+
"""
|
|
21
|
+
Serializes raw data.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
tensor_dict (`Dict[str, Dict[Any]]`):
|
|
25
|
+
The tensor dict is like:
|
|
26
|
+
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
27
|
+
metadata (`Dict[str, str]`, *optional*):
|
|
28
|
+
The optional purely text annotations
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
(`bytes`):
|
|
32
|
+
The serialized content.
|
|
33
|
+
"""
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
@staticmethod
|
|
37
|
+
def serialize_file(tensor_dict, filename, metadata=None):
|
|
38
|
+
"""
|
|
39
|
+
Serializes raw data.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
tensor_dict (`Dict[str, Dict[Any]]`):
|
|
43
|
+
The tensor dict is like:
|
|
44
|
+
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
45
|
+
filename (`str`, or `os.PathLike`):
|
|
46
|
+
The name of the file to write into.
|
|
47
|
+
metadata (`Dict[str, str]`, *optional*):
|
|
48
|
+
The optional purely text annotations
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
(`bytes`):
|
|
52
|
+
The serialized content.
|
|
53
|
+
"""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
class safe_open:
|
|
57
|
+
"""
|
|
58
|
+
Opens a safetensors lazily and returns tensors as asked
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
filename (`str`, or `os.PathLike`):
|
|
62
|
+
The filename to open
|
|
63
|
+
|
|
64
|
+
framework (`str`):
|
|
65
|
+
The framework you want you tensors in. Supported values:
|
|
66
|
+
`pt`, `tf`, `flax`, `numpy`.
|
|
67
|
+
|
|
68
|
+
device (`str`, defaults to `"cpu"`):
|
|
69
|
+
The device on which you want the tensors.
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, filename, framework, device=...):
|
|
73
|
+
pass
|
|
74
|
+
def __enter__(self):
|
|
75
|
+
"""
|
|
76
|
+
Start the context manager
|
|
77
|
+
"""
|
|
78
|
+
pass
|
|
79
|
+
def __exit__(self, _exc_type, _exc_value, _traceback):
|
|
80
|
+
"""
|
|
81
|
+
Exits the context manager
|
|
82
|
+
"""
|
|
83
|
+
pass
|
|
84
|
+
def get_slice(self, name):
|
|
85
|
+
"""
|
|
86
|
+
Returns a full slice view object
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
name (`str`):
|
|
90
|
+
The name of the tensor you want
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
(`PySafeSlice`):
|
|
94
|
+
A dummy object you can slice into to get a real tensor
|
|
95
|
+
Example:
|
|
96
|
+
```python
|
|
97
|
+
from safetensors import safe_open
|
|
98
|
+
|
|
99
|
+
with safe_open("model.safetensors", framework="pt", device=0) as f:
|
|
100
|
+
tensor_part = f.get_slice("embedding")[:, ::8]
|
|
101
|
+
|
|
102
|
+
```
|
|
103
|
+
"""
|
|
104
|
+
pass
|
|
105
|
+
def get_tensor(self, name):
|
|
106
|
+
"""
|
|
107
|
+
Returns a full tensor
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
name (`str`):
|
|
111
|
+
The name of the tensor you want
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
(`Tensor`):
|
|
115
|
+
The tensor in the framework you opened the file for.
|
|
116
|
+
|
|
117
|
+
Example:
|
|
118
|
+
```python
|
|
119
|
+
from safetensors import safe_open
|
|
120
|
+
|
|
121
|
+
with safe_open("model.safetensors", framework="pt", device=0) as f:
|
|
122
|
+
tensor = f.get_tensor("embedding")
|
|
123
|
+
|
|
124
|
+
```
|
|
125
|
+
"""
|
|
126
|
+
pass
|
|
127
|
+
def keys(self):
|
|
128
|
+
"""
|
|
129
|
+
Returns the names of the tensors in the file.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
(`List[str]`):
|
|
133
|
+
The name of the tensors contained in that file
|
|
134
|
+
"""
|
|
135
|
+
pass
|
|
136
|
+
def metadata(self):
|
|
137
|
+
"""
|
|
138
|
+
Return the special non tensor information in the header
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
(`Dict[str, str]`):
|
|
142
|
+
The freeform metadata.
|
|
143
|
+
"""
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
class SafetensorError(Exception):
|
|
147
|
+
"""
|
|
148
|
+
Custom Python Exception for Safetensor errors.
|
|
149
|
+
"""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "safetensors"
|
|
3
|
-
version = "0.5.
|
|
3
|
+
version = "0.5.2"
|
|
4
4
|
edition = "2021"
|
|
5
5
|
homepage = "https://github.com/huggingface/safetensors"
|
|
6
6
|
repository = "https://github.com/huggingface/safetensors"
|
|
@@ -21,14 +21,20 @@ exclude = [ "rust-toolchain", "target/*", "Cargo.lock"]
|
|
|
21
21
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
|
22
22
|
|
|
23
23
|
[dependencies]
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
hashbrown = { version = "0.15.2", features = ["serde"], optional = true }
|
|
25
|
+
serde = { version = "1.0", default-features = false, features = ["derive"] }
|
|
26
|
+
serde_json = { version = "1.0", default-features = false }
|
|
26
27
|
|
|
27
28
|
[dev-dependencies]
|
|
28
29
|
criterion = "0.5"
|
|
29
30
|
memmap2 = "0.9"
|
|
30
31
|
proptest = "1.4"
|
|
31
32
|
|
|
33
|
+
[features]
|
|
34
|
+
default = ["std"]
|
|
35
|
+
std = ["serde/default", "serde_json/default"]
|
|
36
|
+
alloc = ["serde/alloc", "serde_json/alloc", "hashbrown"]
|
|
37
|
+
|
|
32
38
|
[[bench]]
|
|
33
39
|
name = "benchmark"
|
|
34
40
|
harness = false
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
#![deny(missing_docs)]
|
|
2
|
+
#![doc = include_str!("../README.md")]
|
|
3
|
+
#![cfg_attr(not(feature = "std"), no_std)]
|
|
4
|
+
pub mod slice;
|
|
5
|
+
pub mod tensor;
|
|
6
|
+
/// serialize_to_file only valid in std
|
|
7
|
+
#[cfg(feature = "std")]
|
|
8
|
+
pub use tensor::serialize_to_file;
|
|
9
|
+
pub use tensor::{serialize, Dtype, SafeTensorError, SafeTensors, View};
|
|
10
|
+
|
|
11
|
+
#[cfg(feature = "alloc")]
|
|
12
|
+
#[macro_use]
|
|
13
|
+
extern crate alloc;
|
|
14
|
+
|
|
15
|
+
#[cfg(all(feature = "std", feature = "alloc"))]
|
|
16
|
+
compile_error!("must choose either the `std` or `alloc` feature, but not both.");
|
|
17
|
+
#[cfg(all(not(feature = "std"), not(feature = "alloc")))]
|
|
18
|
+
compile_error!("must choose either the `std` or `alloc` feature");
|
|
19
|
+
|
|
20
|
+
/// A facade around all the types we need from the `std`, `core`, and `alloc`
|
|
21
|
+
/// crates. This avoids elaborate import wrangling having to happen in every
|
|
22
|
+
/// module.
|
|
23
|
+
mod lib {
|
|
24
|
+
#[cfg(not(feature = "std"))]
|
|
25
|
+
mod no_stds {
|
|
26
|
+
pub use alloc::borrow::Cow;
|
|
27
|
+
pub use alloc::string::{String, ToString};
|
|
28
|
+
pub use alloc::vec::Vec;
|
|
29
|
+
pub use hashbrown::HashMap;
|
|
30
|
+
}
|
|
31
|
+
#[cfg(feature = "std")]
|
|
32
|
+
mod stds {
|
|
33
|
+
pub use std::borrow::Cow;
|
|
34
|
+
pub use std::collections::HashMap;
|
|
35
|
+
pub use std::string::{String, ToString};
|
|
36
|
+
pub use std::vec::Vec;
|
|
37
|
+
}
|
|
38
|
+
/// choose std or no_std to export by feature flag
|
|
39
|
+
#[cfg(not(feature = "std"))]
|
|
40
|
+
pub use no_stds::*;
|
|
41
|
+
#[cfg(feature = "std")]
|
|
42
|
+
pub use stds::*;
|
|
43
|
+
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
//! Module handling lazy loading via iterating on slices on the original buffer.
|
|
2
|
+
use crate::lib::{String, ToString, Vec};
|
|
2
3
|
use crate::tensor::TensorView;
|
|
3
|
-
use
|
|
4
|
-
use std::ops::{
|
|
4
|
+
use core::ops::{
|
|
5
5
|
Bound, Range, RangeBounds, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive,
|
|
6
6
|
};
|
|
7
7
|
|
|
@@ -40,8 +40,8 @@ fn display_bound(bound: &Bound<usize>) -> String {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
/// Intended for Python users mostly or at least for its conventions
|
|
43
|
-
impl fmt::Display for TensorIndexer {
|
|
44
|
-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
43
|
+
impl core::fmt::Display for TensorIndexer {
|
|
44
|
+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
|
45
45
|
match self {
|
|
46
46
|
TensorIndexer::Select(n) => {
|
|
47
47
|
write!(f, "{n}")
|
|
@@ -77,7 +77,7 @@ macro_rules! impl_from_range {
|
|
|
77
77
|
($range_type:ty) => {
|
|
78
78
|
impl From<$range_type> for TensorIndexer {
|
|
79
79
|
fn from(range: $range_type) -> Self {
|
|
80
|
-
use
|
|
80
|
+
use core::ops::Bound::*;
|
|
81
81
|
|
|
82
82
|
let start = match range.start_bound() {
|
|
83
83
|
Included(idx) => Included(*idx),
|
|
@@ -1,11 +1,9 @@
|
|
|
1
1
|
//! Module Containing the most important structures
|
|
2
|
+
use crate::lib::{Cow, HashMap, String, ToString, Vec};
|
|
2
3
|
use crate::slice::{InvalidSlice, SliceIterator, TensorIndexer};
|
|
3
4
|
use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
|
|
4
|
-
|
|
5
|
-
use std::
|
|
6
|
-
use std::fs::File;
|
|
7
|
-
use std::io::{BufWriter, Write};
|
|
8
|
-
use std::path::Path;
|
|
5
|
+
#[cfg(feature = "std")]
|
|
6
|
+
use std::io::Write;
|
|
9
7
|
|
|
10
8
|
const MAX_HEADER_SIZE: usize = 100_000_000;
|
|
11
9
|
|
|
@@ -32,6 +30,7 @@ pub enum SafeTensorError {
|
|
|
32
30
|
/// The offsets declared for tensor with name `String` in the header are invalid
|
|
33
31
|
InvalidOffset(String),
|
|
34
32
|
/// IoError
|
|
33
|
+
#[cfg(feature = "std")]
|
|
35
34
|
IoError(std::io::Error),
|
|
36
35
|
/// JSON error
|
|
37
36
|
JsonError(serde_json::Error),
|
|
@@ -46,6 +45,7 @@ pub enum SafeTensorError {
|
|
|
46
45
|
ValidationOverflow,
|
|
47
46
|
}
|
|
48
47
|
|
|
48
|
+
#[cfg(feature = "std")]
|
|
49
49
|
impl From<std::io::Error> for SafeTensorError {
|
|
50
50
|
fn from(error: std::io::Error) -> SafeTensorError {
|
|
51
51
|
SafeTensorError::IoError(error)
|
|
@@ -58,13 +58,13 @@ impl From<serde_json::Error> for SafeTensorError {
|
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
-
impl
|
|
62
|
-
fn fmt(&self, f: &mut
|
|
61
|
+
impl core::fmt::Display for SafeTensorError {
|
|
62
|
+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
|
63
63
|
write!(f, "{self:?}")
|
|
64
64
|
}
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
impl
|
|
67
|
+
impl core::error::Error for SafeTensorError {}
|
|
68
68
|
|
|
69
69
|
struct PreparedData {
|
|
70
70
|
n: u64,
|
|
@@ -164,7 +164,7 @@ pub trait View {
|
|
|
164
164
|
fn data_len(&self) -> usize;
|
|
165
165
|
}
|
|
166
166
|
|
|
167
|
-
fn prepare<S: AsRef<str> + Ord +
|
|
167
|
+
fn prepare<S: AsRef<str> + Ord + core::fmt::Display, V: View, I: IntoIterator<Item = (S, V)>>(
|
|
168
168
|
data: I,
|
|
169
169
|
data_info: &Option<HashMap<String, String>>,
|
|
170
170
|
// ) -> Result<(Metadata, Vec<&'hash TensorView<'data>>, usize), SafeTensorError> {
|
|
@@ -212,7 +212,7 @@ fn prepare<S: AsRef<str> + Ord + std::fmt::Display, V: View, I: IntoIterator<Ite
|
|
|
212
212
|
|
|
213
213
|
/// Serialize to an owned byte buffer the dictionnary of tensors.
|
|
214
214
|
pub fn serialize<
|
|
215
|
-
S: AsRef<str> + Ord +
|
|
215
|
+
S: AsRef<str> + Ord + core::fmt::Display,
|
|
216
216
|
V: View,
|
|
217
217
|
I: IntoIterator<Item = (S, V)>,
|
|
218
218
|
>(
|
|
@@ -240,14 +240,15 @@ pub fn serialize<
|
|
|
240
240
|
/// Serialize to a regular file the dictionnary of tensors.
|
|
241
241
|
/// Writing directly to file reduces the need to allocate the whole amount to
|
|
242
242
|
/// memory.
|
|
243
|
+
#[cfg(feature = "std")]
|
|
243
244
|
pub fn serialize_to_file<
|
|
244
|
-
S: AsRef<str> + Ord +
|
|
245
|
+
S: AsRef<str> + Ord + core::fmt::Display,
|
|
245
246
|
V: View,
|
|
246
247
|
I: IntoIterator<Item = (S, V)>,
|
|
247
248
|
>(
|
|
248
249
|
data: I,
|
|
249
250
|
data_info: &Option<HashMap<String, String>>,
|
|
250
|
-
filename: &Path,
|
|
251
|
+
filename: &std::path::Path,
|
|
251
252
|
) -> Result<(), SafeTensorError> {
|
|
252
253
|
let (
|
|
253
254
|
PreparedData {
|
|
@@ -255,7 +256,7 @@ pub fn serialize_to_file<
|
|
|
255
256
|
},
|
|
256
257
|
tensors,
|
|
257
258
|
) = prepare(data, data_info)?;
|
|
258
|
-
let mut f = BufWriter::new(File::create(filename)?);
|
|
259
|
+
let mut f = std::io::BufWriter::new(std::fs::File::create(filename)?);
|
|
259
260
|
f.write_all(n.to_le_bytes().as_ref())?;
|
|
260
261
|
f.write_all(&header_bytes)?;
|
|
261
262
|
for tensor in tensors {
|
|
@@ -303,7 +304,7 @@ impl<'data> SafeTensors<'data> {
|
|
|
303
304
|
return Err(SafeTensorError::InvalidHeaderLength);
|
|
304
305
|
}
|
|
305
306
|
let string =
|
|
306
|
-
|
|
307
|
+
core::str::from_utf8(&buffer[8..stop]).map_err(|_| SafeTensorError::InvalidHeader)?;
|
|
307
308
|
// Assert the string starts with {
|
|
308
309
|
// NOTE: Add when we move to 0.4.0
|
|
309
310
|
// if !string.starts_with('{') {
|
|
@@ -719,6 +720,9 @@ mod tests {
|
|
|
719
720
|
use super::*;
|
|
720
721
|
use crate::slice::IndexOp;
|
|
721
722
|
use proptest::prelude::*;
|
|
723
|
+
#[cfg(not(feature = "std"))]
|
|
724
|
+
extern crate std;
|
|
725
|
+
use std::io::Write;
|
|
722
726
|
|
|
723
727
|
const MAX_DIMENSION: usize = 8;
|
|
724
728
|
const MAX_SIZE: usize = 8;
|
|
@@ -1021,10 +1025,13 @@ mod tests {
|
|
|
1021
1025
|
std::fs::remove_file(&filename).unwrap();
|
|
1022
1026
|
|
|
1023
1027
|
// File api
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
+
#[cfg(feature = "std")]
|
|
1029
|
+
{
|
|
1030
|
+
serialize_to_file(&metadata, &None, std::path::Path::new(&filename)).unwrap();
|
|
1031
|
+
let raw = std::fs::read(&filename).unwrap();
|
|
1032
|
+
let _deserialized = SafeTensors::deserialize(&raw).unwrap();
|
|
1033
|
+
std::fs::remove_file(&filename).unwrap();
|
|
1034
|
+
}
|
|
1028
1035
|
}
|
|
1029
1036
|
|
|
1030
1037
|
#[test]
|
|
@@ -1097,7 +1104,7 @@ mod tests {
|
|
|
1097
1104
|
let n = serialized.len();
|
|
1098
1105
|
|
|
1099
1106
|
let filename = "out.safetensors";
|
|
1100
|
-
let mut f = BufWriter::new(File::create(filename).unwrap());
|
|
1107
|
+
let mut f = std::io::BufWriter::new(std::fs::File::create(filename).unwrap());
|
|
1101
1108
|
f.write_all(n.to_le_bytes().as_ref()).unwrap();
|
|
1102
1109
|
f.write_all(serialized).unwrap();
|
|
1103
1110
|
f.write_all(b"\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0").unwrap();
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# Generated content DO NOT EDIT
|
|
2
|
-
@staticmethod
|
|
3
|
-
def deserialize(bytes):
|
|
4
|
-
"""
|
|
5
|
-
Opens a safetensors lazily and returns tensors as asked
|
|
6
|
-
|
|
7
|
-
Args:
|
|
8
|
-
data (:obj:`bytes`):
|
|
9
|
-
The byte content of a file
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
(:obj:`List[str, Dict[str, Dict[str, any]]]`):
|
|
13
|
-
The deserialized content is like:
|
|
14
|
-
[("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
|
|
15
|
-
"""
|
|
16
|
-
pass
|
|
17
|
-
|
|
18
|
-
@staticmethod
|
|
19
|
-
def serialize(tensor_dict, metadata=None):
|
|
20
|
-
"""
|
|
21
|
-
Serializes raw data.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
tensor_dict (:obj:`Dict[str, Dict[Any]]`):
|
|
25
|
-
The tensor dict is like:
|
|
26
|
-
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
27
|
-
metadata (:obj:`Dict[str, str]`, *optional*):
|
|
28
|
-
The optional purely text annotations
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
(:obj:`bytes`):
|
|
32
|
-
The serialized content.
|
|
33
|
-
"""
|
|
34
|
-
pass
|
|
35
|
-
|
|
36
|
-
@staticmethod
|
|
37
|
-
def serialize_file(tensor_dict, filename, metadata=None):
|
|
38
|
-
"""
|
|
39
|
-
Serializes raw data.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
tensor_dict (:obj:`Dict[str, Dict[Any]]`):
|
|
43
|
-
The tensor dict is like:
|
|
44
|
-
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
45
|
-
filename (:obj:`str`):
|
|
46
|
-
The name of the file to write into.
|
|
47
|
-
metadata (:obj:`Dict[str, str]`, *optional*):
|
|
48
|
-
The optional purely text annotations
|
|
49
|
-
|
|
50
|
-
Returns:
|
|
51
|
-
(:obj:`bytes`):
|
|
52
|
-
The serialized content.
|
|
53
|
-
"""
|
|
54
|
-
pass
|
|
55
|
-
|
|
56
|
-
class safe_open:
|
|
57
|
-
"""
|
|
58
|
-
Opens a safetensors lazily and returns tensors as asked
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
filename (:obj:`str`):
|
|
62
|
-
The filename to open
|
|
63
|
-
|
|
64
|
-
framework (:obj:`str`):
|
|
65
|
-
The framework you want your tensors in. Supported values:
|
|
66
|
-
`pt`, `tf`, `flax`, `numpy`.
|
|
67
|
-
|
|
68
|
-
device (:obj:`str`, defaults to :obj:`"cpu"`):
|
|
69
|
-
The device on which you want the tensors.
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
def __init__(self, filename, framework, device="cpu"):
|
|
73
|
-
pass
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
# Generated content DO NOT EDIT
|
|
2
|
-
@staticmethod
|
|
3
|
-
def deserialize(bytes):
|
|
4
|
-
"""
|
|
5
|
-
Opens a safetensors lazily and returns tensors as asked
|
|
6
|
-
|
|
7
|
-
Args:
|
|
8
|
-
data (:obj:`bytes`):
|
|
9
|
-
The byte content of a file
|
|
10
|
-
|
|
11
|
-
Returns:
|
|
12
|
-
(:obj:`List[str, Dict[str, Dict[str, any]]]`):
|
|
13
|
-
The deserialized content is like:
|
|
14
|
-
[("tensor_name", {"shape": [2, 3], "dtype": "F32", "data": b"\0\0.." }), (...)]
|
|
15
|
-
"""
|
|
16
|
-
pass
|
|
17
|
-
|
|
18
|
-
@staticmethod
|
|
19
|
-
def serialize(tensor_dict, metadata=None):
|
|
20
|
-
"""
|
|
21
|
-
Serializes raw data.
|
|
22
|
-
|
|
23
|
-
Args:
|
|
24
|
-
tensor_dict (:obj:`Dict[str, Dict[Any]]`):
|
|
25
|
-
The tensor dict is like:
|
|
26
|
-
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
27
|
-
metadata (:obj:`Dict[str, str]`, *optional*):
|
|
28
|
-
The optional purely text annotations
|
|
29
|
-
|
|
30
|
-
Returns:
|
|
31
|
-
(:obj:`bytes`):
|
|
32
|
-
The serialized content.
|
|
33
|
-
"""
|
|
34
|
-
pass
|
|
35
|
-
|
|
36
|
-
@staticmethod
|
|
37
|
-
def serialize_file(tensor_dict, filename, metadata=None):
|
|
38
|
-
"""
|
|
39
|
-
Serializes raw data.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
tensor_dict (:obj:`Dict[str, Dict[Any]]`):
|
|
43
|
-
The tensor dict is like:
|
|
44
|
-
{"tensor_name": {"dtype": "F32", "shape": [2, 3], "data": b"\0\0"}}
|
|
45
|
-
filename (:obj:`str`):
|
|
46
|
-
The name of the file to write into.
|
|
47
|
-
metadata (:obj:`Dict[str, str]`, *optional*):
|
|
48
|
-
The optional purely text annotations
|
|
49
|
-
|
|
50
|
-
Returns:
|
|
51
|
-
(:obj:`bytes`):
|
|
52
|
-
The serialized content.
|
|
53
|
-
"""
|
|
54
|
-
pass
|
|
55
|
-
|
|
56
|
-
class safe_open:
|
|
57
|
-
"""
|
|
58
|
-
Opens a safetensors lazily and returns tensors as asked
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
filename (:obj:`str`):
|
|
62
|
-
The filename to open
|
|
63
|
-
|
|
64
|
-
framework (:obj:`str`):
|
|
65
|
-
The framework you want your tensors in. Supported values:
|
|
66
|
-
`pt`, `tf`, `flax`, `numpy`.
|
|
67
|
-
|
|
68
|
-
device (:obj:`str`, defaults to :obj:`"cpu"`):
|
|
69
|
-
The device on which you want the tensors.
|
|
70
|
-
"""
|
|
71
|
-
|
|
72
|
-
def __init__(self, filename, framework, device="cpu"):
|
|
73
|
-
pass
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|