bagofholding 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bagofholding/_version.py +2 -2
- bagofholding/content.py +15 -1
- bagofholding/h5/bag.py +20 -6
- bagofholding/h5/dtypes.py +3 -2
- bagofholding/h5/triebag.py +47 -11
- {bagofholding-0.1.6.dist-info → bagofholding-0.1.8.dist-info}/METADATA +4 -2
- {bagofholding-0.1.6.dist-info → bagofholding-0.1.8.dist-info}/RECORD +9 -9
- {bagofholding-0.1.6.dist-info → bagofholding-0.1.8.dist-info}/WHEEL +1 -1
- {bagofholding-0.1.6.dist-info → bagofholding-0.1.8.dist-info}/licenses/LICENSE +0 -0
bagofholding/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.8'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 8)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
bagofholding/content.py
CHANGED
|
@@ -871,9 +871,23 @@ KNOWN_GROUP_MAP: dict[type, type[Group[Any, Any]]] = {
|
|
|
871
871
|
}
|
|
872
872
|
|
|
873
873
|
|
|
874
|
+
def is_simple_string(string: str) -> bool:
|
|
875
|
+
return string.isascii() and ("/" not in string)
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
def has_surrogates(s: str) -> bool:
|
|
879
|
+
try:
|
|
880
|
+
s.encode("utf-8")
|
|
881
|
+
return False
|
|
882
|
+
except UnicodeEncodeError:
|
|
883
|
+
return True
|
|
884
|
+
|
|
885
|
+
|
|
874
886
|
def get_group_content_class(obj: object) -> type[Group[Any, Any]] | None:
|
|
875
887
|
t = type(obj)
|
|
876
|
-
if t is dict and all(
|
|
888
|
+
if t is dict and all(
|
|
889
|
+
isinstance(k, str) and is_simple_string(k) for k in cast(dict[str, Any], obj)
|
|
890
|
+
):
|
|
877
891
|
return StrKeyDict
|
|
878
892
|
|
|
879
893
|
return KNOWN_GROUP_MAP.get(t)
|
bagofholding/h5/bag.py
CHANGED
|
@@ -8,7 +8,7 @@ import h5py
|
|
|
8
8
|
import numpy as np
|
|
9
9
|
|
|
10
10
|
from bagofholding.bag import Bag, BagInfo
|
|
11
|
-
from bagofholding.content import BespokeItem
|
|
11
|
+
from bagofholding.content import BespokeItem, has_surrogates
|
|
12
12
|
from bagofholding.exceptions import NotAGroupError
|
|
13
13
|
from bagofholding.h5.content import Array, ArrayPacker, ArrayType
|
|
14
14
|
from bagofholding.h5.context import HasH5FileContext
|
|
@@ -107,11 +107,21 @@ class H5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
107
107
|
self.file.create_dataset(path, data=h5py.Empty(dtype="f"))
|
|
108
108
|
|
|
109
109
|
def pack_string(self, obj: str, path: str) -> None:
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
if has_surrogates(obj):
|
|
111
|
+
encoded = obj.encode("utf-16", errors="surrogatepass")
|
|
112
|
+
self.file.create_dataset(path, data=np.void(encoded))
|
|
113
|
+
self.file[path].attrs["_surrogate_str"] = True
|
|
114
|
+
else:
|
|
115
|
+
self.file.create_dataset(
|
|
116
|
+
path, data=obj, dtype=h5py.string_dtype(encoding="utf-8")
|
|
117
|
+
)
|
|
113
118
|
|
|
114
119
|
def unpack_string(self, path: str) -> str:
|
|
120
|
+
if self.file[path].attrs.get("_surrogate_str", False):
|
|
121
|
+
return cast(
|
|
122
|
+
str,
|
|
123
|
+
self.file[path][()].tobytes().decode("utf-16", errors="surrogatepass"),
|
|
124
|
+
)
|
|
115
125
|
return cast(str, self._unpack_raw(path).decode("utf-8"))
|
|
116
126
|
|
|
117
127
|
def _pack_raw(self, obj: bytearray | bool | int | float, path: str) -> None:
|
|
@@ -146,7 +156,11 @@ class H5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
146
156
|
return complex(data[0], data[1])
|
|
147
157
|
|
|
148
158
|
def pack_bytes(self, obj: bytes, path: str) -> None:
|
|
149
|
-
|
|
159
|
+
if obj == b"":
|
|
160
|
+
special = h5py.special_dtype(vlen=bytes)
|
|
161
|
+
self.file.create_dataset(path, data=b"", dtype=special)
|
|
162
|
+
else:
|
|
163
|
+
self.file.create_dataset(path, data=np.void(obj))
|
|
150
164
|
|
|
151
165
|
def unpack_bytes(self, path: str) -> bytes:
|
|
152
166
|
return bytes(self._unpack_raw(path))
|
|
@@ -172,7 +186,7 @@ class H5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
172
186
|
def get_bespoke_content_class(
|
|
173
187
|
self, obj: object
|
|
174
188
|
) -> type[BespokeItem[Any, Self]] | None:
|
|
175
|
-
if type(obj) is np.ndarray and obj.dtype in H5PY_DTYPE_WHITELIST:
|
|
189
|
+
if type(obj) is np.ndarray and obj.dtype.type in H5PY_DTYPE_WHITELIST:
|
|
176
190
|
return cast(type[BespokeItem[Any, Self]], Array)
|
|
177
191
|
return None
|
|
178
192
|
|
bagofholding/h5/dtypes.py
CHANGED
bagofholding/h5/triebag.py
CHANGED
|
@@ -9,7 +9,7 @@ import numpy as np
|
|
|
9
9
|
import pygtrie
|
|
10
10
|
|
|
11
11
|
from bagofholding.bag import PATH_DELIMITER, Bag, BagInfo
|
|
12
|
-
from bagofholding.content import BespokeItem
|
|
12
|
+
from bagofholding.content import BespokeItem, has_surrogates
|
|
13
13
|
from bagofholding.h5.bag import H5Info
|
|
14
14
|
from bagofholding.h5.content import Array, ArrayPacker, ArrayType
|
|
15
15
|
from bagofholding.h5.context import HasH5FileContext
|
|
@@ -54,6 +54,8 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
54
54
|
"array": 8,
|
|
55
55
|
"empty": 9,
|
|
56
56
|
"group": 10,
|
|
57
|
+
"empty_bytes": 11,
|
|
58
|
+
"surrogate_str": 12,
|
|
57
59
|
}
|
|
58
60
|
)
|
|
59
61
|
_field_delimiter: ClassVar[str] = "::"
|
|
@@ -95,7 +97,8 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
95
97
|
list[bytes],
|
|
96
98
|
list[bytearray],
|
|
97
99
|
list[ArrayType],
|
|
98
|
-
|
|
100
|
+
list[bytes],
|
|
101
|
+
] = ([], [], [], [], [], [], [], [], [], [])
|
|
99
102
|
|
|
100
103
|
@property
|
|
101
104
|
def unpacked_trie(self) -> pygtrie.StringTrie:
|
|
@@ -133,16 +136,26 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
133
136
|
self.file.create_dataset(
|
|
134
137
|
"complex_imag", data=np.array(self._packed[5], dtype=float)
|
|
135
138
|
)
|
|
136
|
-
self.file.
|
|
137
|
-
self.
|
|
138
|
-
"
|
|
139
|
-
|
|
139
|
+
bytes_group = self.file.create_group("bytes_data")
|
|
140
|
+
for i, b in enumerate(self._packed[6]):
|
|
141
|
+
bytes_group.create_dataset(f"i{i}", data=np.void(b))
|
|
142
|
+
bytearray_group = self.file.create_group("bytearrays")
|
|
143
|
+
for i, ba in enumerate(self._packed[7]):
|
|
144
|
+
bytearray_group.create_dataset(f"i{i}", data=ba)
|
|
145
|
+
# A reasonable alternative would be to group bytes and bytearrays by their
|
|
146
|
+
# length, creating fewer overall groups. Benefit depends on how heavily weighted
|
|
147
|
+
# unique lengths are to shared lengths, so for now keep it simple
|
|
140
148
|
array_group = self.file.create_group("ndarrays")
|
|
141
149
|
for i, ra in enumerate(self._packed[8]):
|
|
142
150
|
array_group.create_dataset(f"i{i}", data=ra)
|
|
143
151
|
# Empty doesn't need to be packed -- it's always None so the meta info is enough
|
|
144
152
|
# Groups don't need to be packed -- they are just holders so meta info is enough
|
|
145
153
|
|
|
154
|
+
# Some string encoding requires special treatment to play with h5py
|
|
155
|
+
surrogate_group = self.file.create_group("surrogate_strs")
|
|
156
|
+
for i, s in enumerate(self._packed[9]):
|
|
157
|
+
surrogate_group.create_dataset(f"i{i}", data=np.void(s))
|
|
158
|
+
|
|
146
159
|
self.close()
|
|
147
160
|
|
|
148
161
|
def _unpack_bag_info(self) -> BagInfo:
|
|
@@ -235,9 +248,21 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
235
248
|
self._pack_trie(path, type_index, len(group) - 1)
|
|
236
249
|
|
|
237
250
|
def pack_string(self, obj: str, path: str) -> None:
|
|
238
|
-
|
|
251
|
+
if has_surrogates(obj):
|
|
252
|
+
encoded = obj.encode("utf-16", errors="surrogatepass")
|
|
253
|
+
type_index = self._index_map["surrogate_str"]
|
|
254
|
+
group = self._packed[9]
|
|
255
|
+
group.append(encoded)
|
|
256
|
+
self._pack_trie(path, type_index, len(group) - 1)
|
|
257
|
+
else:
|
|
258
|
+
self._pack_thing(obj, "str", path)
|
|
239
259
|
|
|
240
260
|
def unpack_string(self, path: str) -> str:
|
|
261
|
+
type_index, position_index = self._read_trie(path)
|
|
262
|
+
if self._index_map.inverse[type_index] == "surrogate_str":
|
|
263
|
+
with self:
|
|
264
|
+
data = self.file[f"surrogate_strs/i{position_index}"][()]
|
|
265
|
+
return cast(str, data.tobytes().decode("utf-16", errors="surrogatepass"))
|
|
241
266
|
return self.maybe_decode(cast(str, self._read_pathlike(path)))
|
|
242
267
|
|
|
243
268
|
def pack_bool(self, obj: bool, path: str) -> None:
|
|
@@ -277,16 +302,27 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
277
302
|
return value
|
|
278
303
|
|
|
279
304
|
def pack_bytes(self, obj: bytes, path: str) -> None:
|
|
280
|
-
|
|
305
|
+
if obj == b"":
|
|
306
|
+
self._pack_trie(path, self._index_map["empty_bytes"], -1)
|
|
307
|
+
else:
|
|
308
|
+
self._pack_thing(obj, "bytes", path)
|
|
281
309
|
|
|
282
310
|
def unpack_bytes(self, path: str) -> bytes:
|
|
283
|
-
|
|
311
|
+
type_index, position_index = self._read_trie(path)
|
|
312
|
+
if self._index_map.inverse[type_index] == "empty_bytes":
|
|
313
|
+
return b""
|
|
314
|
+
else:
|
|
315
|
+
with self:
|
|
316
|
+
return bytes(self.file[f"bytes_data/i{position_index}"][()])
|
|
284
317
|
|
|
285
318
|
def pack_bytearray(self, obj: bytearray, path: str) -> None:
|
|
286
319
|
self._pack_thing(obj, "bytearray", path)
|
|
287
320
|
|
|
288
321
|
def unpack_bytearray(self, path: str) -> bytearray:
|
|
289
|
-
|
|
322
|
+
_, position_index = self._read_trie(path)
|
|
323
|
+
with self:
|
|
324
|
+
value = bytearray(self.file[f"bytearrays/i{position_index}"][()])
|
|
325
|
+
return value
|
|
290
326
|
|
|
291
327
|
def create_group(self, path: str) -> None:
|
|
292
328
|
self._pack_trie(path, self._index_map["group"], -1)
|
|
@@ -308,7 +344,7 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
|
|
|
308
344
|
def get_bespoke_content_class(
|
|
309
345
|
self, obj: object
|
|
310
346
|
) -> type[BespokeItem[Any, Self]] | None:
|
|
311
|
-
if type(obj) is np.ndarray and obj.dtype in H5PY_DTYPE_WHITELIST:
|
|
347
|
+
if type(obj) is np.ndarray and obj.dtype.type in H5PY_DTYPE_WHITELIST:
|
|
312
348
|
return cast(type[BespokeItem[Any, Self]], Array)
|
|
313
349
|
return None
|
|
314
350
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bagofholding
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.8
|
|
4
4
|
Summary: bagofholding - browsable, partially-reloadable serialization for pickleable python objects.
|
|
5
5
|
Project-URL: Homepage, https://pyiron.org/
|
|
6
6
|
Project-URL: Documentation, https://bagofholding.readthedocs.io
|
|
@@ -48,9 +48,11 @@ Classifier: Topic :: Scientific/Engineering
|
|
|
48
48
|
Requires-Python: <3.15,>=3.11
|
|
49
49
|
Requires-Dist: bidict==0.23.1
|
|
50
50
|
Requires-Dist: h5py<3.16.0,>=3.12.1
|
|
51
|
-
Requires-Dist: numpy<2.
|
|
51
|
+
Requires-Dist: numpy<2.5.0,>=1.26.4
|
|
52
52
|
Requires-Dist: pygtrie<2.6.0,>=2.5.0
|
|
53
53
|
Requires-Dist: pyiron-snippets<2.0.0,>=1.0.0
|
|
54
|
+
Provides-Extra: dev
|
|
55
|
+
Requires-Dist: hypothesis==6.151.5; extra == 'dev'
|
|
54
56
|
Provides-Extra: widget
|
|
55
57
|
Requires-Dist: ipytree==0.2.2; extra == 'widget'
|
|
56
58
|
Requires-Dist: traitlets==5.14.3; extra == 'widget'
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
bagofholding/__init__.py,sha256=a8YLAb8TmNVGW-zwWOGBk1iUA4vJLYp92TklXCjV5sw,1314
|
|
2
|
-
bagofholding/_version.py,sha256=
|
|
2
|
+
bagofholding/_version.py,sha256=Zaz3s9gl_rzsS46-ymJOALojMxviW77EJq_agE8knLk,704
|
|
3
3
|
bagofholding/bag.py,sha256=i4ves0M_nMR0e7WT-KQQQZOdb29LEQwc0PmXwsBtSuU,8642
|
|
4
|
-
bagofholding/content.py,sha256=
|
|
4
|
+
bagofholding/content.py,sha256=0eN55ytBL-1YdYnGpuXpLqqS0m8_1qbPOO4VEEEtU5s,26617
|
|
5
5
|
bagofholding/exceptions.py,sha256=e6Vwc6ok7xXvdFy3k2hE7LytiVn1w0PaHH1Cwv06wu0,939
|
|
6
6
|
bagofholding/metadata.py,sha256=F7Yv6RB1p_BbW3dKSodx7AcMoUWUFX8zWd9iz6owwMg,6866
|
|
7
7
|
bagofholding/trie.py,sha256=rAWxR8hITeGZimSsAWp-_A2ymLNbBFLJXEtC_en__S8,6083
|
|
8
8
|
bagofholding/widget.py,sha256=DCsSKxZ855lt5dgoxhT4_YoHbyqygO7D2rJxPaKtyWg,3608
|
|
9
9
|
bagofholding/h5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
bagofholding/h5/bag.py,sha256=
|
|
10
|
+
bagofholding/h5/bag.py,sha256=XEfGlAkKmE7vdRy_ImOApIMAovPw3f2s6mxh7Av8OTw,6445
|
|
11
11
|
bagofholding/h5/content.py,sha256=xAHfeMsm6c8ZgQ0RvkFrF1ooDA3BhMPSx9e6MfG8FLM,970
|
|
12
12
|
bagofholding/h5/context.py,sha256=JT70idJPUF0jEtIpZObTrQ8Xcf595dFSEGyss7dYKR8,1613
|
|
13
|
-
bagofholding/h5/dtypes.py,sha256=
|
|
14
|
-
bagofholding/h5/triebag.py,sha256=
|
|
15
|
-
bagofholding-0.1.
|
|
16
|
-
bagofholding-0.1.
|
|
17
|
-
bagofholding-0.1.
|
|
18
|
-
bagofholding-0.1.
|
|
13
|
+
bagofholding/h5/dtypes.py,sha256=trcwpQfShiarcN2Fg5HI2p18vakfUH74mms0_CeyzLc,1269
|
|
14
|
+
bagofholding/h5/triebag.py,sha256=ck8hCsupiJTYj9GUWTTaadivenst1XfM0i0bLxF14TY,13932
|
|
15
|
+
bagofholding-0.1.8.dist-info/METADATA,sha256=dsTzNb_rQVvjnvUSV5ZTB7tIQvnAo88bsQAU8x_6HPk,10085
|
|
16
|
+
bagofholding-0.1.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
bagofholding-0.1.8.dist-info/licenses/LICENSE,sha256=bNZX-3pFSnVGhRfng8SbI2FgEKLZb2nGO5jdsbwjEtk,1542
|
|
18
|
+
bagofholding-0.1.8.dist-info/RECORD,,
|
|
File without changes
|