bagofholding 0.1.6__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bagofholding/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.6'
32
- __version_tuple__ = version_tuple = (0, 1, 6)
31
+ __version__ = version = '0.1.8'
32
+ __version_tuple__ = version_tuple = (0, 1, 8)
33
33
 
34
34
  __commit_id__ = commit_id = None
bagofholding/content.py CHANGED
@@ -871,9 +871,23 @@ KNOWN_GROUP_MAP: dict[type, type[Group[Any, Any]]] = {
871
871
  }
872
872
 
873
873
 
874
+ def is_simple_string(string: str) -> bool:
875
+ return string.isascii() and ("/" not in string)
876
+
877
+
878
+ def has_surrogates(s: str) -> bool:
879
+ try:
880
+ s.encode("utf-8")
881
+ return False
882
+ except UnicodeEncodeError:
883
+ return True
884
+
885
+
874
886
  def get_group_content_class(obj: object) -> type[Group[Any, Any]] | None:
875
887
  t = type(obj)
876
- if t is dict and all(isinstance(k, str) for k in cast(dict[str, Any], obj)):
888
+ if t is dict and all(
889
+ isinstance(k, str) and is_simple_string(k) for k in cast(dict[str, Any], obj)
890
+ ):
877
891
  return StrKeyDict
878
892
 
879
893
  return KNOWN_GROUP_MAP.get(t)
bagofholding/h5/bag.py CHANGED
@@ -8,7 +8,7 @@ import h5py
8
8
  import numpy as np
9
9
 
10
10
  from bagofholding.bag import Bag, BagInfo
11
- from bagofholding.content import BespokeItem
11
+ from bagofholding.content import BespokeItem, has_surrogates
12
12
  from bagofholding.exceptions import NotAGroupError
13
13
  from bagofholding.h5.content import Array, ArrayPacker, ArrayType
14
14
  from bagofholding.h5.context import HasH5FileContext
@@ -107,11 +107,21 @@ class H5Bag(Bag, HasH5FileContext, ArrayPacker):
107
107
  self.file.create_dataset(path, data=h5py.Empty(dtype="f"))
108
108
 
109
109
  def pack_string(self, obj: str, path: str) -> None:
110
- self.file.create_dataset(
111
- path, data=obj, dtype=h5py.string_dtype(encoding="utf-8")
112
- )
110
+ if has_surrogates(obj):
111
+ encoded = obj.encode("utf-16", errors="surrogatepass")
112
+ self.file.create_dataset(path, data=np.void(encoded))
113
+ self.file[path].attrs["_surrogate_str"] = True
114
+ else:
115
+ self.file.create_dataset(
116
+ path, data=obj, dtype=h5py.string_dtype(encoding="utf-8")
117
+ )
113
118
 
114
119
  def unpack_string(self, path: str) -> str:
120
+ if self.file[path].attrs.get("_surrogate_str", False):
121
+ return cast(
122
+ str,
123
+ self.file[path][()].tobytes().decode("utf-16", errors="surrogatepass"),
124
+ )
115
125
  return cast(str, self._unpack_raw(path).decode("utf-8"))
116
126
 
117
127
  def _pack_raw(self, obj: bytearray | bool | int | float, path: str) -> None:
@@ -146,7 +156,11 @@ class H5Bag(Bag, HasH5FileContext, ArrayPacker):
146
156
  return complex(data[0], data[1])
147
157
 
148
158
  def pack_bytes(self, obj: bytes, path: str) -> None:
149
- self.file.create_dataset(path, data=np.void(obj))
159
+ if obj == b"":
160
+ special = h5py.special_dtype(vlen=bytes)
161
+ self.file.create_dataset(path, data=b"", dtype=special)
162
+ else:
163
+ self.file.create_dataset(path, data=np.void(obj))
150
164
 
151
165
  def unpack_bytes(self, path: str) -> bytes:
152
166
  return bytes(self._unpack_raw(path))
@@ -172,7 +186,7 @@ class H5Bag(Bag, HasH5FileContext, ArrayPacker):
172
186
  def get_bespoke_content_class(
173
187
  self, obj: object
174
188
  ) -> type[BespokeItem[Any, Self]] | None:
175
- if type(obj) is np.ndarray and obj.dtype in H5PY_DTYPE_WHITELIST:
189
+ if type(obj) is np.ndarray and obj.dtype.type in H5PY_DTYPE_WHITELIST:
176
190
  return cast(type[BespokeItem[Any, Self]], Array)
177
191
  return None
178
192
 
bagofholding/h5/dtypes.py CHANGED
@@ -23,8 +23,9 @@ H5PY_DTYPE_WHITELIST = (
23
23
  np.complex64,
24
24
  np.complex128,
25
25
  np.bool_,
26
- np.bytes_,
27
- np.str_,
26
+ # np.bytes_,
27
+ # np.str_,
28
+ # h5py needs special handling for byte and string info
28
29
  )
29
30
 
30
31
 
@@ -9,7 +9,7 @@ import numpy as np
9
9
  import pygtrie
10
10
 
11
11
  from bagofholding.bag import PATH_DELIMITER, Bag, BagInfo
12
- from bagofholding.content import BespokeItem
12
+ from bagofholding.content import BespokeItem, has_surrogates
13
13
  from bagofholding.h5.bag import H5Info
14
14
  from bagofholding.h5.content import Array, ArrayPacker, ArrayType
15
15
  from bagofholding.h5.context import HasH5FileContext
@@ -54,6 +54,8 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
54
54
  "array": 8,
55
55
  "empty": 9,
56
56
  "group": 10,
57
+ "empty_bytes": 11,
58
+ "surrogate_str": 12,
57
59
  }
58
60
  )
59
61
  _field_delimiter: ClassVar[str] = "::"
@@ -95,7 +97,8 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
95
97
  list[bytes],
96
98
  list[bytearray],
97
99
  list[ArrayType],
98
- ] = ([], [], [], [], [], [], [], [], [])
100
+ list[bytes],
101
+ ] = ([], [], [], [], [], [], [], [], [], [])
99
102
 
100
103
  @property
101
104
  def unpacked_trie(self) -> pygtrie.StringTrie:
@@ -133,16 +136,26 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
133
136
  self.file.create_dataset(
134
137
  "complex_imag", data=np.array(self._packed[5], dtype=float)
135
138
  )
136
- self.file.create_dataset("bytes", data=np.array(self._packed[6]))
137
- self.file.create_dataset(
138
- "bytearray", data=np.array(self._packed[7])
139
- ) # dtype=bytearray
139
+ bytes_group = self.file.create_group("bytes_data")
140
+ for i, b in enumerate(self._packed[6]):
141
+ bytes_group.create_dataset(f"i{i}", data=np.void(b))
142
+ bytearray_group = self.file.create_group("bytearrays")
143
+ for i, ba in enumerate(self._packed[7]):
144
+ bytearray_group.create_dataset(f"i{i}", data=ba)
145
+ # A reasonable alternative would be to group bytes and bytearrays by their
146
+ # length, creating fewer overall groups. Benefit depends on how heavily weighted
147
+ # unique lengths are to shared lengths, so for now keep it simple
140
148
  array_group = self.file.create_group("ndarrays")
141
149
  for i, ra in enumerate(self._packed[8]):
142
150
  array_group.create_dataset(f"i{i}", data=ra)
143
151
  # Empty doesn't need to be packed -- it's always None so the meta info is enough
144
152
  # Groups don't need to be packed -- they are just holders so meta info is enough
145
153
 
154
+ # Some string encoding requires special treatment to play with h5py
155
+ surrogate_group = self.file.create_group("surrogate_strs")
156
+ for i, s in enumerate(self._packed[9]):
157
+ surrogate_group.create_dataset(f"i{i}", data=np.void(s))
158
+
146
159
  self.close()
147
160
 
148
161
  def _unpack_bag_info(self) -> BagInfo:
@@ -235,9 +248,21 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
235
248
  self._pack_trie(path, type_index, len(group) - 1)
236
249
 
237
250
  def pack_string(self, obj: str, path: str) -> None:
238
- self._pack_thing(obj, "str", path)
251
+ if has_surrogates(obj):
252
+ encoded = obj.encode("utf-16", errors="surrogatepass")
253
+ type_index = self._index_map["surrogate_str"]
254
+ group = self._packed[9]
255
+ group.append(encoded)
256
+ self._pack_trie(path, type_index, len(group) - 1)
257
+ else:
258
+ self._pack_thing(obj, "str", path)
239
259
 
240
260
  def unpack_string(self, path: str) -> str:
261
+ type_index, position_index = self._read_trie(path)
262
+ if self._index_map.inverse[type_index] == "surrogate_str":
263
+ with self:
264
+ data = self.file[f"surrogate_strs/i{position_index}"][()]
265
+ return cast(str, data.tobytes().decode("utf-16", errors="surrogatepass"))
241
266
  return self.maybe_decode(cast(str, self._read_pathlike(path)))
242
267
 
243
268
  def pack_bool(self, obj: bool, path: str) -> None:
@@ -277,16 +302,27 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
277
302
  return value
278
303
 
279
304
  def pack_bytes(self, obj: bytes, path: str) -> None:
280
- self._pack_thing(obj, "bytes", path)
305
+ if obj == b"":
306
+ self._pack_trie(path, self._index_map["empty_bytes"], -1)
307
+ else:
308
+ self._pack_thing(obj, "bytes", path)
281
309
 
282
310
  def unpack_bytes(self, path: str) -> bytes:
283
- return cast(bytes, self._read_pathlike(path).tobytes())
311
+ type_index, position_index = self._read_trie(path)
312
+ if self._index_map.inverse[type_index] == "empty_bytes":
313
+ return b""
314
+ else:
315
+ with self:
316
+ return bytes(self.file[f"bytes_data/i{position_index}"][()])
284
317
 
285
318
  def pack_bytearray(self, obj: bytearray, path: str) -> None:
286
319
  self._pack_thing(obj, "bytearray", path)
287
320
 
288
321
  def unpack_bytearray(self, path: str) -> bytearray:
289
- return bytearray(self._read_pathlike(path))
322
+ _, position_index = self._read_trie(path)
323
+ with self:
324
+ value = bytearray(self.file[f"bytearrays/i{position_index}"][()])
325
+ return value
290
326
 
291
327
  def create_group(self, path: str) -> None:
292
328
  self._pack_trie(path, self._index_map["group"], -1)
@@ -308,7 +344,7 @@ class TrieH5Bag(Bag, HasH5FileContext, ArrayPacker):
308
344
  def get_bespoke_content_class(
309
345
  self, obj: object
310
346
  ) -> type[BespokeItem[Any, Self]] | None:
311
- if type(obj) is np.ndarray and obj.dtype in H5PY_DTYPE_WHITELIST:
347
+ if type(obj) is np.ndarray and obj.dtype.type in H5PY_DTYPE_WHITELIST:
312
348
  return cast(type[BespokeItem[Any, Self]], Array)
313
349
  return None
314
350
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bagofholding
3
- Version: 0.1.6
3
+ Version: 0.1.8
4
4
  Summary: bagofholding - browsable, partially-reloadable serialization for pickleable python objects.
5
5
  Project-URL: Homepage, https://pyiron.org/
6
6
  Project-URL: Documentation, https://bagofholding.readthedocs.io
@@ -48,9 +48,11 @@ Classifier: Topic :: Scientific/Engineering
48
48
  Requires-Python: <3.15,>=3.11
49
49
  Requires-Dist: bidict==0.23.1
50
50
  Requires-Dist: h5py<3.16.0,>=3.12.1
51
- Requires-Dist: numpy<2.4.0,>=1.26.4
51
+ Requires-Dist: numpy<2.5.0,>=1.26.4
52
52
  Requires-Dist: pygtrie<2.6.0,>=2.5.0
53
53
  Requires-Dist: pyiron-snippets<2.0.0,>=1.0.0
54
+ Provides-Extra: dev
55
+ Requires-Dist: hypothesis==6.151.5; extra == 'dev'
54
56
  Provides-Extra: widget
55
57
  Requires-Dist: ipytree==0.2.2; extra == 'widget'
56
58
  Requires-Dist: traitlets==5.14.3; extra == 'widget'
@@ -1,18 +1,18 @@
1
1
  bagofholding/__init__.py,sha256=a8YLAb8TmNVGW-zwWOGBk1iUA4vJLYp92TklXCjV5sw,1314
2
- bagofholding/_version.py,sha256=riGXiVTWXmtdoju9hVCWvTxpszEMAAIK0sZZWoLKlnU,704
2
+ bagofholding/_version.py,sha256=Zaz3s9gl_rzsS46-ymJOALojMxviW77EJq_agE8knLk,704
3
3
  bagofholding/bag.py,sha256=i4ves0M_nMR0e7WT-KQQQZOdb29LEQwc0PmXwsBtSuU,8642
4
- bagofholding/content.py,sha256=zezO4YDEHnuRqElFPxJkewBStQqu5A3AHogjV3blXT4,26337
4
+ bagofholding/content.py,sha256=0eN55ytBL-1YdYnGpuXpLqqS0m8_1qbPOO4VEEEtU5s,26617
5
5
  bagofholding/exceptions.py,sha256=e6Vwc6ok7xXvdFy3k2hE7LytiVn1w0PaHH1Cwv06wu0,939
6
6
  bagofholding/metadata.py,sha256=F7Yv6RB1p_BbW3dKSodx7AcMoUWUFX8zWd9iz6owwMg,6866
7
7
  bagofholding/trie.py,sha256=rAWxR8hITeGZimSsAWp-_A2ymLNbBFLJXEtC_en__S8,6083
8
8
  bagofholding/widget.py,sha256=DCsSKxZ855lt5dgoxhT4_YoHbyqygO7D2rJxPaKtyWg,3608
9
9
  bagofholding/h5/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- bagofholding/h5/bag.py,sha256=Rt-L8vTr1acwBg3_9A6xfkOYlIWXXh3-Lf4m_TwR-f8,5801
10
+ bagofholding/h5/bag.py,sha256=XEfGlAkKmE7vdRy_ImOApIMAovPw3f2s6mxh7Av8OTw,6445
11
11
  bagofholding/h5/content.py,sha256=xAHfeMsm6c8ZgQ0RvkFrF1ooDA3BhMPSx9e6MfG8FLM,970
12
12
  bagofholding/h5/context.py,sha256=JT70idJPUF0jEtIpZObTrQ8Xcf595dFSEGyss7dYKR8,1613
13
- bagofholding/h5/dtypes.py,sha256=7WKR0U-RsM2IAMPWnxDFCKgg9WLabD0j9Py72-fF8MM,1206
14
- bagofholding/h5/triebag.py,sha256=fyYKweuMLms3wSDFB7_DTVsoMv9I1y8kLwVxxztQcYk,12119
15
- bagofholding-0.1.6.dist-info/METADATA,sha256=nkMjQdLiLzZDt9pNjkmB01FJ8kgU65AnJNW8pvnHuUI,10014
16
- bagofholding-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
17
- bagofholding-0.1.6.dist-info/licenses/LICENSE,sha256=bNZX-3pFSnVGhRfng8SbI2FgEKLZb2nGO5jdsbwjEtk,1542
18
- bagofholding-0.1.6.dist-info/RECORD,,
13
+ bagofholding/h5/dtypes.py,sha256=trcwpQfShiarcN2Fg5HI2p18vakfUH74mms0_CeyzLc,1269
14
+ bagofholding/h5/triebag.py,sha256=ck8hCsupiJTYj9GUWTTaadivenst1XfM0i0bLxF14TY,13932
15
+ bagofholding-0.1.8.dist-info/METADATA,sha256=dsTzNb_rQVvjnvUSV5ZTB7tIQvnAo88bsQAU8x_6HPk,10085
16
+ bagofholding-0.1.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
17
+ bagofholding-0.1.8.dist-info/licenses/LICENSE,sha256=bNZX-3pFSnVGhRfng8SbI2FgEKLZb2nGO5jdsbwjEtk,1542
18
+ bagofholding-0.1.8.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any