legend-pydataobj 1.12.0a4__py3-none-any.whl → 1.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/RECORD +26 -25
- {legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/WHEEL +1 -1
- lgdo/_version.py +2 -2
- lgdo/compression/radware.py +3 -3
- lgdo/compression/varlen.py +2 -2
- lgdo/lh5/__init__.py +2 -2
- lgdo/lh5/_serializers/read/ndarray.py +5 -5
- lgdo/lh5/_serializers/write/array.py +3 -4
- lgdo/lh5/_serializers/write/composite.py +94 -14
- lgdo/lh5/core.py +19 -11
- lgdo/lh5/datatype.py +5 -1
- lgdo/lh5/iterator.py +56 -43
- lgdo/lh5/settings.py +34 -0
- lgdo/lh5/store.py +14 -8
- lgdo/lh5/tools.py +7 -6
- lgdo/lh5/utils.py +18 -10
- lgdo/types/array.py +5 -2
- lgdo/types/encoded.py +6 -0
- lgdo/types/scalar.py +3 -0
- lgdo/types/struct.py +52 -2
- lgdo/types/table.py +11 -6
- lgdo/types/vectorofvectors.py +3 -0
- {legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/licenses/LICENSE +0 -0
- {legend_pydataobj-1.12.0a4.dist-info → legend_pydataobj-1.14.0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
legend_pydataobj-1.
|
1
|
+
legend_pydataobj-1.14.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
2
2
|
lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
|
3
|
-
lgdo/_version.py,sha256=
|
3
|
+
lgdo/_version.py,sha256=zEosD-3Sqrti57GKf-4yC-NurX2Smyv5d6IDkQisUBo,513
|
4
4
|
lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
|
5
5
|
lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
|
6
6
|
lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
|
@@ -9,47 +9,48 @@ lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
|
|
9
9
|
lgdo/compression/__init__.py,sha256=xHt_8Th0LxxNwj9iYHf5uGNTm3A_4qyW7zEVdAX3NwI,1127
|
10
10
|
lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
|
11
11
|
lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
|
12
|
-
lgdo/compression/radware.py,sha256
|
12
|
+
lgdo/compression/radware.py,sha256=-W7LgvkSVzdVJ6qNn7Ts3O9EcRcl8mUiApTLqR4dtIo,23836
|
13
13
|
lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
|
14
|
-
lgdo/compression/varlen.py,sha256=
|
15
|
-
lgdo/lh5/__init__.py,sha256=
|
14
|
+
lgdo/compression/varlen.py,sha256=bjyxhHzfpi6PIPy-Uc47W8_LrRbFoJLJ2kVeD5nhyqo,15125
|
15
|
+
lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
|
16
16
|
lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
|
17
|
-
lgdo/lh5/core.py,sha256=
|
18
|
-
lgdo/lh5/datatype.py,sha256=
|
17
|
+
lgdo/lh5/core.py,sha256=tbvitu3Pr-FCF4nOopVxGVOobDhGaVWo4o0HS58TGtY,13806
|
18
|
+
lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
|
19
19
|
lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
|
20
|
-
lgdo/lh5/iterator.py,sha256=
|
21
|
-
lgdo/lh5/
|
22
|
-
lgdo/lh5/
|
23
|
-
lgdo/lh5/
|
20
|
+
lgdo/lh5/iterator.py,sha256=vuN98pa-xHDWXM2GMxvMxFEJGfHatMX6ajqnaP55PuY,20680
|
21
|
+
lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
|
22
|
+
lgdo/lh5/store.py,sha256=HJuDjWQ8ztrKDoyWW3cplhtWDnz3J4a-Fe2WF4fzOY4,8676
|
23
|
+
lgdo/lh5/tools.py,sha256=EZTCj3TMMp4Rnocq1F0QeO1yYHzx4yMR7l_Em4G7sC4,6503
|
24
|
+
lgdo/lh5/utils.py,sha256=hxPoaG25MOhuu7emrw2xzx3zerl-GzeMWdlfoQmLiYo,6667
|
24
25
|
lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
|
25
26
|
lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
27
|
lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
|
27
28
|
lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
|
28
29
|
lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
|
29
|
-
lgdo/lh5/_serializers/read/ndarray.py,sha256=
|
30
|
+
lgdo/lh5/_serializers/read/ndarray.py,sha256=cxzZ7esT5BzxyoXfITBG_EDTtCVxSeSu6dVZrohOdOY,3685
|
30
31
|
lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
|
31
32
|
lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
|
32
33
|
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
|
33
34
|
lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
|
-
lgdo/lh5/_serializers/write/array.py,sha256=
|
35
|
-
lgdo/lh5/_serializers/write/composite.py,sha256=
|
35
|
+
lgdo/lh5/_serializers/write/array.py,sha256=qzRNPQ4mtvc7HYPE3vUcM6bi7lWYnolNStdJVcDfzPU,3174
|
36
|
+
lgdo/lh5/_serializers/write/composite.py,sha256=sZfV8aGZCH0mvMZ2dGDKt-MoepgL4PlR9ZWbT_JNIjQ,12171
|
36
37
|
lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
|
37
38
|
lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
|
38
39
|
lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
|
39
|
-
lgdo/types/array.py,sha256=
|
40
|
+
lgdo/types/array.py,sha256=TpZINHgGIptslwr5mwKYWU_PrYAk8bH1ECJ4XfLkWxg,9338
|
40
41
|
lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
|
41
|
-
lgdo/types/encoded.py,sha256=
|
42
|
+
lgdo/types/encoded.py,sha256=8DJHb3kxz6RrmjkeLWS6iyjvIJqx86mDInWqqjpMON0,15752
|
42
43
|
lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
|
43
44
|
lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
|
44
45
|
lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
|
45
|
-
lgdo/types/scalar.py,sha256=
|
46
|
-
lgdo/types/struct.py,sha256=
|
47
|
-
lgdo/types/table.py,sha256=
|
48
|
-
lgdo/types/vectorofvectors.py,sha256=
|
46
|
+
lgdo/types/scalar.py,sha256=nBPiqX4g3GrPavEbG6nCt2Jel7Mj0IchXqwxB6ei_rg,1989
|
47
|
+
lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
|
48
|
+
lgdo/types/table.py,sha256=huhgpzdAUx0bRaEaitwnb-Ve7oAu5B6zxPK5EXPUfg0,20233
|
49
|
+
lgdo/types/vectorofvectors.py,sha256=k1LwNnX3TcRAhOujj85kNkfZN0MXZYL9aaMUbr82JlE,26910
|
49
50
|
lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
|
50
51
|
lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
|
51
|
-
legend_pydataobj-1.
|
52
|
-
legend_pydataobj-1.
|
53
|
-
legend_pydataobj-1.
|
54
|
-
legend_pydataobj-1.
|
55
|
-
legend_pydataobj-1.
|
52
|
+
legend_pydataobj-1.14.0.dist-info/METADATA,sha256=JaH2muAaB5Otjd9XhqiFfrgqtf9mR6F4XbIBPlZmB0g,44443
|
53
|
+
legend_pydataobj-1.14.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
54
|
+
legend_pydataobj-1.14.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
|
55
|
+
legend_pydataobj-1.14.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
56
|
+
legend_pydataobj-1.14.0.dist-info/RECORD,,
|
lgdo/_version.py
CHANGED
@@ -17,5 +17,5 @@ __version__: str
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
18
18
|
version_tuple: VERSION_TUPLE
|
19
19
|
|
20
|
-
__version__ = version = '1.
|
21
|
-
__version_tuple__ = version_tuple = (1,
|
20
|
+
__version__ = version = '1.14.0'
|
21
|
+
__version_tuple__ = version_tuple = (1, 14, 0)
|
lgdo/compression/radware.py
CHANGED
@@ -95,13 +95,13 @@ def encode(
|
|
95
95
|
if isinstance(sig_in, np.ndarray):
|
96
96
|
s = sig_in.shape
|
97
97
|
if len(sig_in) == 0:
|
98
|
-
return np.empty(s[:-1]
|
98
|
+
return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
|
99
99
|
|
100
100
|
if sig_out is None:
|
101
101
|
# the encoded signal is an array of bytes
|
102
102
|
# -> twice as long as a uint16
|
103
103
|
# pre-allocate ubyte (uint8) array, expand last dimension
|
104
|
-
sig_out = np.empty(s[:-1]
|
104
|
+
sig_out = np.empty((*s[:-1], s[-1] * 2), dtype=ubyte)
|
105
105
|
|
106
106
|
if sig_out.dtype != ubyte:
|
107
107
|
msg = "sig_out must be of type ubyte"
|
@@ -226,7 +226,7 @@ def decode(
|
|
226
226
|
# allocate output array with lasd dim as large as the longest
|
227
227
|
# uncompressed wf
|
228
228
|
maxs = np.max(_get_hton_u16(sig_in[0], 0))
|
229
|
-
sig_out = np.empty(s[:-1]
|
229
|
+
sig_out = np.empty((*s[:-1], maxs), dtype=int32)
|
230
230
|
|
231
231
|
# siglen has one dimension less (the last)
|
232
232
|
siglen = np.empty(s[:-1], dtype=uint32)
|
lgdo/compression/varlen.py
CHANGED
@@ -74,14 +74,14 @@ def encode(
|
|
74
74
|
if isinstance(sig_in, np.ndarray):
|
75
75
|
s = sig_in.shape
|
76
76
|
if len(sig_in) == 0:
|
77
|
-
return np.empty(s[:-1]
|
77
|
+
return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
|
78
78
|
|
79
79
|
if sig_out is None:
|
80
80
|
# the encoded signal is an array of bytes
|
81
81
|
# pre-allocate ubyte (uint8) array with a generous (but safe) size
|
82
82
|
max_b = int(np.ceil(np.iinfo(sig_in.dtype).bits / 16) * 5)
|
83
83
|
# expand last dimension
|
84
|
-
sig_out = np.empty(s[:-1]
|
84
|
+
sig_out = np.empty((*s[:-1], s[-1] * max_b), dtype=ubyte)
|
85
85
|
|
86
86
|
if sig_out.dtype != ubyte:
|
87
87
|
msg = "sig_out must be of type ubyte"
|
lgdo/lh5/__init__.py
CHANGED
@@ -7,7 +7,6 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
|
7
7
|
|
8
8
|
from __future__ import annotations
|
9
9
|
|
10
|
-
from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
|
11
10
|
from .core import read, read_as, write
|
12
11
|
from .iterator import LH5Iterator
|
13
12
|
from .store import LH5Store
|
@@ -15,14 +14,15 @@ from .tools import ls, show
|
|
15
14
|
from .utils import read_n_rows
|
16
15
|
|
17
16
|
__all__ = [
|
18
|
-
"DEFAULT_HDF5_SETTINGS",
|
19
17
|
"LH5Iterator",
|
20
18
|
"LH5Store",
|
21
19
|
"concat",
|
20
|
+
"default_hdf5_settings",
|
22
21
|
"ls",
|
23
22
|
"read",
|
24
23
|
"read_as",
|
25
24
|
"read_n_rows",
|
25
|
+
"reset_default_hdf5_settings",
|
26
26
|
"show",
|
27
27
|
"write",
|
28
28
|
]
|
@@ -57,7 +57,7 @@ def _h5_read_ndarray(
|
|
57
57
|
(start_row,) + (0,) * (h5d.rank - 1),
|
58
58
|
(1,) * h5d.rank,
|
59
59
|
None,
|
60
|
-
(n_rows_to_read,
|
60
|
+
(n_rows_to_read, *fspace.shape[1:]),
|
61
61
|
)
|
62
62
|
elif use_h5idx:
|
63
63
|
# Note that h5s will automatically merge adjacent elements into a range
|
@@ -67,7 +67,7 @@ def _h5_read_ndarray(
|
|
67
67
|
(i,) + (0,) * (h5d.rank - 1),
|
68
68
|
(1,) * h5d.rank,
|
69
69
|
None,
|
70
|
-
(1,
|
70
|
+
(1, *fspace.shape[1:]),
|
71
71
|
h5py.h5s.SELECT_OR,
|
72
72
|
)
|
73
73
|
|
@@ -84,7 +84,7 @@ def _h5_read_ndarray(
|
|
84
84
|
(obj_buf_start,) + (0,) * (h5d.rank - 1),
|
85
85
|
(1,) * h5d.rank,
|
86
86
|
None,
|
87
|
-
(n_rows_to_read,
|
87
|
+
(n_rows_to_read, *fspace.shape[1:]),
|
88
88
|
)
|
89
89
|
h5d.read(mspace, fspace, obj_buf.nda)
|
90
90
|
else:
|
@@ -93,10 +93,10 @@ def _h5_read_ndarray(
|
|
93
93
|
obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
|
94
94
|
nda = obj_buf.nda
|
95
95
|
elif n_rows == 0:
|
96
|
-
tmp_shape = (0,
|
96
|
+
tmp_shape = (0, *h5d.shape[1:])
|
97
97
|
nda = np.empty(tmp_shape, h5d.dtype)
|
98
98
|
else:
|
99
|
-
mspace = h5py.h5s.create_simple((n_rows_to_read,
|
99
|
+
mspace = h5py.h5s.create_simple((n_rows_to_read, *fspace.shape[1:]))
|
100
100
|
nda = np.empty(mspace.shape, h5d.dtype)
|
101
101
|
if idx is None or use_h5idx:
|
102
102
|
h5d.read(mspace, fspace, nda)
|
@@ -6,12 +6,11 @@ import h5py
|
|
6
6
|
import numpy as np
|
7
7
|
|
8
8
|
from .... import types
|
9
|
+
from ... import settings
|
9
10
|
from ...exceptions import LH5EncodeError
|
10
11
|
|
11
12
|
log = logging.getLogger(__name__)
|
12
13
|
|
13
|
-
DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
|
14
|
-
|
15
14
|
|
16
15
|
def _h5_write_array(
|
17
16
|
obj,
|
@@ -41,7 +40,7 @@ def _h5_write_array(
|
|
41
40
|
# this is needed in order to have a resizable (in the first
|
42
41
|
# axis) data set, i.e. rows can be appended later
|
43
42
|
# NOTE: this automatically turns chunking on!
|
44
|
-
maxshape = (None,
|
43
|
+
maxshape = (None, *nda.shape[1:])
|
45
44
|
h5py_kwargs.setdefault("maxshape", maxshape)
|
46
45
|
|
47
46
|
if wo_mode == "o" and name in group:
|
@@ -49,7 +48,7 @@ def _h5_write_array(
|
|
49
48
|
del group[name]
|
50
49
|
|
51
50
|
# set default compression options
|
52
|
-
for k, v in DEFAULT_HDF5_SETTINGS.items():
|
51
|
+
for k, v in settings.DEFAULT_HDF5_SETTINGS.items():
|
53
52
|
h5py_kwargs.setdefault(k, v)
|
54
53
|
|
55
54
|
# compress using the 'compression' LGDO attribute, if available
|
@@ -64,6 +64,59 @@ def _h5_write_lgdo(
|
|
64
64
|
|
65
65
|
group = utils.get_h5_group(group, lh5_file)
|
66
66
|
|
67
|
+
# name already in file
|
68
|
+
if name in group or (
|
69
|
+
("datatype" in group.attrs or group == "/")
|
70
|
+
and (len(name) <= 2 or "/" not in name[1:-1])
|
71
|
+
):
|
72
|
+
pass
|
73
|
+
# group is in file but not struct or need to create nesting
|
74
|
+
else:
|
75
|
+
# check if name is nested
|
76
|
+
# if name is nested, iterate up from parent
|
77
|
+
# otherwise we just need to iterate the group
|
78
|
+
if len(name) > 2 and "/" in name[1:-1]:
|
79
|
+
group = utils.get_h5_group(
|
80
|
+
name[:-1].rsplit("/", 1)[0],
|
81
|
+
group,
|
82
|
+
)
|
83
|
+
curr_name = (
|
84
|
+
name.rsplit("/", 1)[1]
|
85
|
+
if name[-1] != "/"
|
86
|
+
else name[:-1].rsplit("/", 1)[1]
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
curr_name = name
|
90
|
+
# initialize the object to be written
|
91
|
+
obj = types.Struct({curr_name.replace("/", ""): obj})
|
92
|
+
|
93
|
+
# if base group already has a child we just append
|
94
|
+
if len(group) >= 1:
|
95
|
+
wo_mode = "ac"
|
96
|
+
else:
|
97
|
+
# iterate up the group hierarchy until we reach the root or a group with more than one child
|
98
|
+
while group.name != "/":
|
99
|
+
if len(group) > 1:
|
100
|
+
break
|
101
|
+
curr_name = group.name
|
102
|
+
group = group.parent
|
103
|
+
if group.name != "/":
|
104
|
+
obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
|
105
|
+
else:
|
106
|
+
obj = types.Struct({curr_name[1:]: obj})
|
107
|
+
# if the group has more than one child, we need to append else we can overwrite
|
108
|
+
wo_mode = "ac" if len(group) > 1 else "o"
|
109
|
+
|
110
|
+
# set the new name
|
111
|
+
if group.name == "/":
|
112
|
+
name = "/"
|
113
|
+
elif group.parent.name == "/":
|
114
|
+
name = group.name[1:]
|
115
|
+
else:
|
116
|
+
name = group.name[len(group.parent.name) + 1 :]
|
117
|
+
# get the new group
|
118
|
+
group = utils.get_h5_group(group.parent if group.name != "/" else "/", lh5_file)
|
119
|
+
|
67
120
|
if wo_mode == "w" and name in group:
|
68
121
|
msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
|
69
122
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
@@ -87,7 +140,7 @@ def _h5_write_lgdo(
|
|
87
140
|
lh5_file,
|
88
141
|
group=group,
|
89
142
|
start_row=start_row,
|
90
|
-
n_rows=n_rows,
|
143
|
+
n_rows=n_rows, # if isinstance(obj, types.Table | types.Histogram) else None,
|
91
144
|
wo_mode=wo_mode,
|
92
145
|
write_start=write_start,
|
93
146
|
**h5py_kwargs,
|
@@ -186,19 +239,31 @@ def _h5_write_struct(
|
|
186
239
|
write_start=0,
|
187
240
|
**h5py_kwargs,
|
188
241
|
):
|
242
|
+
# this works for structs and derived (tables)
|
189
243
|
assert isinstance(obj, types.Struct)
|
190
244
|
|
191
245
|
# In order to append a column, we need to update the
|
192
|
-
# `table{old_fields}` value in `group.attrs['datatype"]` to include
|
193
|
-
# the new fields.
|
194
|
-
#
|
195
|
-
#
|
246
|
+
# `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
|
247
|
+
# the new fields. One way to do this is to override `obj.attrs["datatype"]`
|
248
|
+
# to include old and new fields. Then we can write the fields to the
|
249
|
+
# struct/table as normal.
|
196
250
|
if wo_mode == "ac":
|
251
|
+
if name not in group:
|
252
|
+
msg = "Cannot append column to non-existing struct on disk"
|
253
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
254
|
+
|
197
255
|
old_group = utils.get_h5_group(name, group)
|
256
|
+
if "datatype" not in old_group.attrs:
|
257
|
+
msg = "Cannot append column to an existing non-LGDO object on disk"
|
258
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
259
|
+
|
198
260
|
lgdotype = datatype.datatype(old_group.attrs["datatype"])
|
199
261
|
fields = datatype.get_struct_fields(old_group.attrs["datatype"])
|
200
|
-
if not
|
201
|
-
msg =
|
262
|
+
if lgdotype is not type(obj):
|
263
|
+
msg = (
|
264
|
+
"Trying to append columns to an object of different "
|
265
|
+
f"type {lgdotype.__name__}!={type(obj)}"
|
266
|
+
)
|
202
267
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
203
268
|
|
204
269
|
# If the mode is `append_column`, make sure we aren't appending
|
@@ -211,8 +276,14 @@ def _h5_write_struct(
|
|
211
276
|
"column(s) to a table with the same field(s)"
|
212
277
|
)
|
213
278
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
279
|
+
|
214
280
|
# It doesn't matter what key we access, as all fields in the old table have the same size
|
215
|
-
if
|
281
|
+
if (
|
282
|
+
isinstance(obj, types.Table)
|
283
|
+
and old_group.attrs["datatype"][:6]
|
284
|
+
!= "struct" # structs dont care about size
|
285
|
+
and old_group[next(iter(old_group.keys()))].size != obj.size
|
286
|
+
):
|
216
287
|
msg = (
|
217
288
|
f"Table sizes don't match. Trying to append column of size {obj.size} "
|
218
289
|
f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
|
@@ -222,16 +293,27 @@ def _h5_write_struct(
|
|
222
293
|
# Now we can append the obj.keys() to the old fields, and then update obj.attrs.
|
223
294
|
fields.extend(list(obj.keys()))
|
224
295
|
obj.attrs.pop("datatype")
|
225
|
-
|
296
|
+
|
297
|
+
obj.attrs["datatype"] = (
|
298
|
+
obj.datatype_name() + "{" + ",".join(sorted(fields)) + "}"
|
299
|
+
)
|
300
|
+
|
301
|
+
# propagating wo_mode="ac" to nested LGDOs does not make any sense
|
302
|
+
wo_mode = "append"
|
303
|
+
|
304
|
+
# overwrite attributes of the existing struct
|
305
|
+
attrs_overwrite = True
|
306
|
+
else:
|
307
|
+
attrs_overwrite = wo_mode == "o"
|
226
308
|
|
227
309
|
group = utils.get_h5_group(
|
228
310
|
name,
|
229
311
|
group,
|
230
312
|
grp_attrs=obj.attrs,
|
231
|
-
overwrite=
|
313
|
+
overwrite=attrs_overwrite,
|
232
314
|
)
|
233
315
|
# If the mode is overwrite, then we need to peek into the file's
|
234
|
-
# table's existing fields.
|
316
|
+
# table's existing fields. If we are writing a new table to the
|
235
317
|
# group that does not contain an old field, we should delete that
|
236
318
|
# old field from the file
|
237
319
|
if wo_mode == "o":
|
@@ -260,11 +342,9 @@ def _h5_write_struct(
|
|
260
342
|
else:
|
261
343
|
obj_fld = obj[field]
|
262
344
|
|
263
|
-
# Convert keys to string for dataset names
|
264
|
-
f = str(field)
|
265
345
|
_h5_write_lgdo(
|
266
346
|
obj_fld,
|
267
|
-
|
347
|
+
str(field),
|
268
348
|
lh5_file,
|
269
349
|
group=group,
|
270
350
|
start_row=start_row,
|
lgdo/lh5/core.py
CHANGED
@@ -5,6 +5,7 @@ import inspect
|
|
5
5
|
import sys
|
6
6
|
from collections.abc import Mapping, Sequence
|
7
7
|
from contextlib import suppress
|
8
|
+
from pathlib import Path
|
8
9
|
from typing import Any
|
9
10
|
|
10
11
|
import h5py
|
@@ -18,7 +19,7 @@ from .utils import read_n_rows
|
|
18
19
|
|
19
20
|
def read(
|
20
21
|
name: str,
|
21
|
-
lh5_file: str | h5py.File | Sequence[str | h5py.File],
|
22
|
+
lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
|
22
23
|
start_row: int = 0,
|
23
24
|
n_rows: int = sys.maxsize,
|
24
25
|
idx: ArrayLike = None,
|
@@ -111,8 +112,8 @@ def read(
|
|
111
112
|
"""
|
112
113
|
if isinstance(lh5_file, h5py.File):
|
113
114
|
lh5_obj = lh5_file[name]
|
114
|
-
elif isinstance(lh5_file, str):
|
115
|
-
lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
|
115
|
+
elif isinstance(lh5_file, (str, Path)):
|
116
|
+
lh5_file = h5py.File(str(Path(lh5_file)), mode="r", locking=locking)
|
116
117
|
try:
|
117
118
|
lh5_obj = lh5_file[name]
|
118
119
|
except KeyError as ke:
|
@@ -194,7 +195,7 @@ def read(
|
|
194
195
|
def write(
|
195
196
|
obj: types.LGDO,
|
196
197
|
name: str,
|
197
|
-
lh5_file: str | h5py.File,
|
198
|
+
lh5_file: str | Path | h5py.File,
|
198
199
|
group: str | h5py.Group = "/",
|
199
200
|
start_row: int = 0,
|
200
201
|
n_rows: int | None = None,
|
@@ -268,11 +269,13 @@ def write(
|
|
268
269
|
end of array is the same as ``append``.
|
269
270
|
- ``overwrite_file`` or ``of``: delete file if present prior to
|
270
271
|
writing to it. `write_start` should be 0 (its ignored).
|
271
|
-
- ``append_column`` or ``ac``: append columns from an
|
272
|
-
:class:`~.lgdo.
|
273
|
-
:class:`~.lgdo.table.Table`
|
274
|
-
|
275
|
-
|
272
|
+
- ``append_column`` or ``ac``: append fields/columns from an
|
273
|
+
:class:`~.lgdo.struct.Struct` `obj` (and derived types such as
|
274
|
+
:class:`~.lgdo.table.Table`) only if there is an existing
|
275
|
+
:class:`~.lgdo.struct.Struct` in the `lh5_file` with the same `name`.
|
276
|
+
If there are matching fields, it errors out. If appending to a
|
277
|
+
``Table`` and the size of the new column is different from the size
|
278
|
+
of the existing table, it errors out.
|
276
279
|
write_start
|
277
280
|
row in the output file (if already existing) to start overwriting
|
278
281
|
from.
|
@@ -288,7 +291,12 @@ def write(
|
|
288
291
|
datasets. **Note: `compression` Ignored if compression is specified
|
289
292
|
as an `obj` attribute.**
|
290
293
|
"""
|
291
|
-
|
294
|
+
|
295
|
+
if (
|
296
|
+
isinstance(lh5_file, str)
|
297
|
+
and not Path(lh5_file).is_file()
|
298
|
+
and wo_mode in ("w", "write_safe", "of", "overwrite_file")
|
299
|
+
):
|
292
300
|
h5py_kwargs.update(
|
293
301
|
{
|
294
302
|
"fs_strategy": "page",
|
@@ -310,7 +318,7 @@ def write(
|
|
310
318
|
|
311
319
|
def read_as(
|
312
320
|
name: str,
|
313
|
-
lh5_file: str | h5py.File | Sequence[str | h5py.File],
|
321
|
+
lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
|
314
322
|
library: str,
|
315
323
|
**kwargs,
|
316
324
|
) -> Any:
|
lgdo/lh5/datatype.py
CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import re
|
4
4
|
from collections import OrderedDict
|
5
|
+
from itertools import permutations as perm
|
5
6
|
|
6
7
|
from .. import types as lgdo
|
7
8
|
|
@@ -14,7 +15,10 @@ _lgdo_datatype_map: dict[str, lgdo.LGDO] = OrderedDict(
|
|
14
15
|
lgdo.ArrayOfEncodedEqualSizedArrays,
|
15
16
|
r"^array_of_encoded_equalsized_arrays<1,1>\{.+\}$",
|
16
17
|
),
|
17
|
-
(
|
18
|
+
(
|
19
|
+
lgdo.Histogram,
|
20
|
+
rf"^struct\{{(?:{'|'.join([','.join(p) for p in perm(['binning', 'weights', 'isdensity'])])})\}}$",
|
21
|
+
),
|
18
22
|
(lgdo.Struct, r"^struct\{.*\}$"),
|
19
23
|
(lgdo.Table, r"^table\{.*\}$"),
|
20
24
|
(lgdo.FixedSizeArray, r"^fixedsize_array<\d+>\{.+\}$"),
|
lgdo/lh5/iterator.py
CHANGED
@@ -17,54 +17,50 @@ LGDO = typing.Union[Array, Scalar, Struct, VectorOfVectors]
|
|
17
17
|
|
18
18
|
|
19
19
|
class LH5Iterator(typing.Iterator):
|
20
|
-
"""
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
>>> lh5_obj = lh5_it.read(i_entry)
|
58
|
-
|
59
|
-
to read the block of entries starting at i_entry. In case of multiple files
|
60
|
-
or the use of an event selection, i_entry refers to a global event index
|
61
|
-
across files and does not count events that are excluded by the selection.
|
20
|
+
"""Iterate over chunks of entries from LH5 files.
|
21
|
+
|
22
|
+
The iterator reads ``buffer_len`` entries at a time from one or more
|
23
|
+
files. The LGDO instance returned at each iteration is reused to avoid
|
24
|
+
reallocations, so copy the data if it should be preserved.
|
25
|
+
|
26
|
+
Examples
|
27
|
+
--------
|
28
|
+
Iterate through a table one chunk at a time::
|
29
|
+
|
30
|
+
from lgdo.lh5 import LH5Iterator
|
31
|
+
|
32
|
+
for table in LH5Iterator("data.lh5", "geds/raw/energy", buffer_len=100):
|
33
|
+
process(table)
|
34
|
+
|
35
|
+
``LH5Iterator`` can also be used for random access::
|
36
|
+
|
37
|
+
it = LH5Iterator(files, groups)
|
38
|
+
table = it.read(i_entry)
|
39
|
+
|
40
|
+
In case of multiple files or an entry selection, ``i_entry`` refers to the
|
41
|
+
global event index across all files.
|
42
|
+
|
43
|
+
When instantiating an iterator you must provide a list of files and the
|
44
|
+
HDF5 groups to read. Optional parameters allow field masking, event
|
45
|
+
selection and pairing the iterator with a "friend" iterator that is read in
|
46
|
+
parallel. Several properties are available to obtain the provenance of the
|
47
|
+
data currently loaded:
|
48
|
+
|
49
|
+
- ``current_i_entry`` -- index within the entry list of the first entry in
|
50
|
+
the buffer
|
51
|
+
- ``current_local_entries`` -- entry numbers relative to the file the data
|
52
|
+
came from
|
53
|
+
- ``current_global_entries`` -- entry number relative to the full dataset
|
54
|
+
- ``current_files`` -- file name corresponding to each entry in the buffer
|
55
|
+
- ``current_groups`` -- group name corresponding to each entry in the
|
56
|
+
buffer
|
62
57
|
"""
|
63
58
|
|
64
59
|
def __init__(
|
65
60
|
self,
|
66
61
|
lh5_files: str | list[str],
|
67
62
|
groups: str | list[str] | list[list[str]],
|
63
|
+
*,
|
68
64
|
base_path: str = "",
|
69
65
|
entry_list: list[int] | list[list[int]] | None = None,
|
70
66
|
entry_mask: list[bool] | list[list[bool]] | None = None,
|
@@ -75,6 +71,7 @@ class LH5Iterator(typing.Iterator):
|
|
75
71
|
file_cache: int = 10,
|
76
72
|
file_map: NDArray[int] = None,
|
77
73
|
friend: typing.Iterator | None = None,
|
74
|
+
h5py_open_mode: str = "r",
|
78
75
|
) -> None:
|
79
76
|
"""
|
80
77
|
Parameters
|
@@ -115,9 +112,21 @@ class LH5Iterator(typing.Iterator):
|
|
115
112
|
The friend should have the same length and entry list. A single
|
116
113
|
LH5 table containing columns from both iterators will be returned.
|
117
114
|
Note that buffer_len will be set to the minimum of the two.
|
115
|
+
h5py_open_mode
|
116
|
+
file open mode used when acquiring file handles. ``r`` (default)
|
117
|
+
opens files read-only while ``a`` allow opening files for
|
118
|
+
write-appending as well.
|
118
119
|
"""
|
119
120
|
self.lh5_st = LH5Store(base_path=base_path, keep_open=file_cache)
|
120
121
|
|
122
|
+
if h5py_open_mode == "read":
|
123
|
+
h5py_open_mode = "r"
|
124
|
+
if h5py_open_mode == "append":
|
125
|
+
h5py_open_mode = "a"
|
126
|
+
if h5py_open_mode not in ["r", "a"]:
|
127
|
+
msg = f"unknown h5py_open_mode '{h5py_open_mode}'"
|
128
|
+
raise ValueError(msg)
|
129
|
+
|
121
130
|
# List of files, with wildcards and env vars expanded
|
122
131
|
if isinstance(lh5_files, str):
|
123
132
|
lh5_files = [lh5_files]
|
@@ -152,6 +161,10 @@ class LH5Iterator(typing.Iterator):
|
|
152
161
|
self.lh5_files += [f_exp] * len(g)
|
153
162
|
self.groups += list(g)
|
154
163
|
|
164
|
+
# open files in the requested mode so they are writable if needed
|
165
|
+
for f in set(self.lh5_files):
|
166
|
+
self.lh5_st.gimme_file(f, mode=h5py_open_mode)
|
167
|
+
|
155
168
|
if entry_list is not None and entry_mask is not None:
|
156
169
|
msg = "entry_list and entry_mask arguments are mutually exclusive"
|
157
170
|
raise ValueError(msg)
|
@@ -505,7 +518,7 @@ class LH5Iterator(typing.Iterator):
|
|
505
518
|
return self
|
506
519
|
|
507
520
|
def __next__(self) -> tuple[LGDO, int, int]:
|
508
|
-
"""Read next
|
521
|
+
"""Read the next chunk of entries and return the buffer."""
|
509
522
|
n_entries = self.n_entries
|
510
523
|
if n_entries is not None:
|
511
524
|
n_entries = min(
|
lgdo/lh5/settings.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
|
6
|
+
def default_hdf5_settings() -> dict[str, Any]:
|
7
|
+
"""Returns the HDF5 settings for writing data to disk to the pydataobj defaults.
|
8
|
+
|
9
|
+
Examples
|
10
|
+
--------
|
11
|
+
>>> from lgdo import lh5
|
12
|
+
>>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
|
13
|
+
>>> lh5.write(data, "data", "file.lh5") # compressed with LZF
|
14
|
+
>>> lh5.DEFAULT_HDF5_SETTINGS = lh5.default_hdf5_settings()
|
15
|
+
>>> lh5.write(data, "data", "file.lh5", "of") # compressed with default settings (GZIP)
|
16
|
+
"""
|
17
|
+
|
18
|
+
return {
|
19
|
+
"shuffle": True,
|
20
|
+
"compression": "gzip",
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
DEFAULT_HDF5_SETTINGS: dict[str, ...] = default_hdf5_settings()
|
25
|
+
"""Global dictionary storing the default HDF5 settings for writing data to disk.
|
26
|
+
|
27
|
+
Modify this global variable before writing data to disk with this package.
|
28
|
+
|
29
|
+
Examples
|
30
|
+
--------
|
31
|
+
>>> from lgdo import lh5
|
32
|
+
>>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
|
33
|
+
>>> lh5.write(data, "data", "file.lh5") # compressed with LZF
|
34
|
+
"""
|
lgdo/lh5/store.py
CHANGED
@@ -38,7 +38,10 @@ class LH5Store:
|
|
38
38
|
"""
|
39
39
|
|
40
40
|
def __init__(
|
41
|
-
self,
|
41
|
+
self,
|
42
|
+
base_path: str | Path = "",
|
43
|
+
keep_open: bool = False,
|
44
|
+
locking: bool = False,
|
42
45
|
) -> None:
|
43
46
|
"""
|
44
47
|
Parameters
|
@@ -52,6 +55,7 @@ class LH5Store:
|
|
52
55
|
locking
|
53
56
|
whether to lock files when reading
|
54
57
|
"""
|
58
|
+
base_path = str(Path(base_path)) if base_path != "" else ""
|
55
59
|
self.base_path = "" if base_path == "" else utils.expand_path(base_path)
|
56
60
|
self.keep_open = keep_open
|
57
61
|
self.locking = locking
|
@@ -59,7 +63,7 @@ class LH5Store:
|
|
59
63
|
|
60
64
|
def gimme_file(
|
61
65
|
self,
|
62
|
-
lh5_file: str | h5py.File,
|
66
|
+
lh5_file: str | Path | h5py.File,
|
63
67
|
mode: str = "r",
|
64
68
|
page_buffer: int = 0,
|
65
69
|
**file_kwargs,
|
@@ -83,6 +87,8 @@ class LH5Store:
|
|
83
87
|
if isinstance(lh5_file, h5py.File):
|
84
88
|
return lh5_file
|
85
89
|
|
90
|
+
lh5_file = str(Path(lh5_file))
|
91
|
+
|
86
92
|
if mode == "r":
|
87
93
|
lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
|
88
94
|
file_kwargs["locking"] = self.locking
|
@@ -147,7 +153,7 @@ class LH5Store:
|
|
147
153
|
def get_buffer(
|
148
154
|
self,
|
149
155
|
name: str,
|
150
|
-
lh5_file: str | h5py.File | Sequence[str | h5py.File],
|
156
|
+
lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
|
151
157
|
size: int | None = None,
|
152
158
|
field_mask: Mapping[str, bool] | Sequence[str] | None = None,
|
153
159
|
) -> types.LGDO:
|
@@ -162,7 +168,7 @@ class LH5Store:
|
|
162
168
|
def read(
|
163
169
|
self,
|
164
170
|
name: str,
|
165
|
-
lh5_file: str | h5py.File | Sequence[str | h5py.File],
|
171
|
+
lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
|
166
172
|
start_row: int = 0,
|
167
173
|
n_rows: int = sys.maxsize,
|
168
174
|
idx: ArrayLike = None,
|
@@ -180,7 +186,7 @@ class LH5Store:
|
|
180
186
|
.lh5.core.read
|
181
187
|
"""
|
182
188
|
# grab files from store
|
183
|
-
if isinstance(lh5_file, (str, h5py.File)):
|
189
|
+
if isinstance(lh5_file, (str, Path, h5py.File)):
|
184
190
|
h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
|
185
191
|
else:
|
186
192
|
h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
|
@@ -201,7 +207,7 @@ class LH5Store:
|
|
201
207
|
self,
|
202
208
|
obj: types.LGDO,
|
203
209
|
name: str,
|
204
|
-
lh5_file: str | h5py.File,
|
210
|
+
lh5_file: str | Path | h5py.File,
|
205
211
|
group: str | h5py.Group = "/",
|
206
212
|
start_row: int = 0,
|
207
213
|
n_rows: int | None = None,
|
@@ -256,14 +262,14 @@ class LH5Store:
|
|
256
262
|
**h5py_kwargs,
|
257
263
|
)
|
258
264
|
|
259
|
-
def read_n_rows(self, name: str, lh5_file: str | h5py.File) -> int | None:
|
265
|
+
def read_n_rows(self, name: str, lh5_file: str | Path | h5py.File) -> int | None:
|
260
266
|
"""Look up the number of rows in an Array-like object called `name` in `lh5_file`.
|
261
267
|
|
262
268
|
Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`.
|
263
269
|
"""
|
264
270
|
return utils.read_n_rows(name, self.gimme_file(lh5_file, "r"))
|
265
271
|
|
266
|
-
def read_size_in_bytes(self, name: str, lh5_file: str | h5py.File) -> int:
|
272
|
+
def read_size_in_bytes(self, name: str, lh5_file: str | Path | h5py.File) -> int:
|
267
273
|
"""Look up the size (in B) of the object in memory. Will recursively
|
268
274
|
crawl through all objects in a Struct or Table
|
269
275
|
"""
|
lgdo/lh5/tools.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import fnmatch
|
4
4
|
import logging
|
5
5
|
from copy import copy
|
6
|
+
from pathlib import Path
|
6
7
|
|
7
8
|
import h5py
|
8
9
|
|
@@ -13,7 +14,7 @@ log = logging.getLogger(__name__)
|
|
13
14
|
|
14
15
|
|
15
16
|
def ls(
|
16
|
-
lh5_file: str | h5py.Group,
|
17
|
+
lh5_file: str | Path | h5py.Group,
|
17
18
|
lh5_group: str = "",
|
18
19
|
recursive: bool = False,
|
19
20
|
) -> list[str]:
|
@@ -39,8 +40,8 @@ def ls(
|
|
39
40
|
|
40
41
|
lh5_st = LH5Store()
|
41
42
|
# To use recursively, make lh5_file a h5group instead of a string
|
42
|
-
if isinstance(lh5_file, str):
|
43
|
-
lh5_file = lh5_st.gimme_file(lh5_file, "r")
|
43
|
+
if isinstance(lh5_file, (str, Path)):
|
44
|
+
lh5_file = lh5_st.gimme_file(str(Path(lh5_file)), "r")
|
44
45
|
if lh5_group.startswith("/"):
|
45
46
|
lh5_group = lh5_group[1:]
|
46
47
|
|
@@ -75,7 +76,7 @@ def ls(
|
|
75
76
|
|
76
77
|
|
77
78
|
def show(
|
78
|
-
lh5_file: str | h5py.Group,
|
79
|
+
lh5_file: str | Path | h5py.Group,
|
79
80
|
lh5_group: str = "/",
|
80
81
|
attrs: bool = False,
|
81
82
|
indent: str = "",
|
@@ -121,8 +122,8 @@ def show(
|
|
121
122
|
return
|
122
123
|
|
123
124
|
# open file
|
124
|
-
if isinstance(lh5_file, str):
|
125
|
-
lh5_file = h5py.File(utils.expand_path(lh5_file), "r", locking=False)
|
125
|
+
if isinstance(lh5_file, (str, Path)):
|
126
|
+
lh5_file = h5py.File(utils.expand_path(Path(lh5_file)), "r", locking=False)
|
126
127
|
|
127
128
|
# go to group
|
128
129
|
if lh5_group != "/":
|
lgdo/lh5/utils.py
CHANGED
@@ -21,7 +21,7 @@ log = logging.getLogger(__name__)
|
|
21
21
|
|
22
22
|
def get_buffer(
|
23
23
|
name: str,
|
24
|
-
lh5_file: str | h5py.File | Sequence[str | h5py.File],
|
24
|
+
lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
|
25
25
|
size: int | None = None,
|
26
26
|
field_mask: Mapping[str, bool] | Sequence[str] | None = None,
|
27
27
|
) -> types.LGDO:
|
@@ -39,7 +39,7 @@ def get_buffer(
|
|
39
39
|
return obj
|
40
40
|
|
41
41
|
|
42
|
-
def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
|
42
|
+
def read_n_rows(name: str, h5f: str | Path | h5py.File) -> int | None:
|
43
43
|
"""Look up the number of rows in an Array-like LGDO object on disk.
|
44
44
|
|
45
45
|
Return ``None`` if `name` is a :class:`.Scalar` or a :class:`.Struct`.
|
@@ -56,7 +56,7 @@ def read_n_rows(name: str, h5f: str | h5py.File) -> int | None:
|
|
56
56
|
return _serializers.read.utils.read_n_rows(h5o, h5f.name, name)
|
57
57
|
|
58
58
|
|
59
|
-
def read_size_in_bytes(name: str, h5f: str | h5py.File) -> int | None:
|
59
|
+
def read_size_in_bytes(name: str, h5f: str | Path | h5py.File) -> int | None:
|
60
60
|
"""Look up the size (in B) in an LGDO object in memory. Will crawl
|
61
61
|
recursively through members of a Struct or Table
|
62
62
|
"""
|
@@ -111,7 +111,7 @@ def get_h5_group(
|
|
111
111
|
grp_attrs is not None
|
112
112
|
and len(set(grp_attrs.items()) ^ set(group.attrs.items())) > 0
|
113
113
|
):
|
114
|
-
if not overwrite:
|
114
|
+
if not overwrite and len(group.attrs) != 0:
|
115
115
|
msg = (
|
116
116
|
f"Provided {grp_attrs=} are different from "
|
117
117
|
f"existing ones {dict(group.attrs)=} but overwrite flag is not set"
|
@@ -158,10 +158,10 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
|
|
158
158
|
|
159
159
|
|
160
160
|
def expand_path(
|
161
|
-
path: str,
|
161
|
+
path: str | Path,
|
162
162
|
substitute: dict[str, str] | None = None,
|
163
163
|
list: bool = False,
|
164
|
-
base_path: str | None = None,
|
164
|
+
base_path: str | Path | None = None,
|
165
165
|
) -> str | list:
|
166
166
|
"""Expand (environment) variables and wildcards to return absolute paths.
|
167
167
|
|
@@ -184,18 +184,26 @@ def expand_path(
|
|
184
184
|
Unique absolute path, or list of all absolute paths
|
185
185
|
"""
|
186
186
|
if base_path is not None and base_path != "":
|
187
|
-
base_path = Path(
|
188
|
-
|
187
|
+
base_path = Path(expand_vars(str(base_path))).expanduser()
|
188
|
+
if not Path(path).expanduser().is_absolute():
|
189
|
+
path = base_path / path
|
189
190
|
|
190
191
|
# first expand variables
|
191
|
-
_path = expand_vars(path, substitute)
|
192
|
+
_path = expand_vars(str(path), substitute)
|
192
193
|
|
193
194
|
# then expand wildcards
|
194
195
|
# pathlib glob works differently so use glob for now
|
195
196
|
paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
|
196
197
|
|
197
198
|
if base_path is not None and base_path != "":
|
198
|
-
|
199
|
+
rel_paths = []
|
200
|
+
for p in paths:
|
201
|
+
p_path = Path(p)
|
202
|
+
try:
|
203
|
+
rel_paths.append(str(p_path.relative_to(base_path)))
|
204
|
+
except ValueError:
|
205
|
+
rel_paths.append(str(p_path))
|
206
|
+
paths = rel_paths
|
199
207
|
|
200
208
|
if not list:
|
201
209
|
if len(paths) == 0:
|
lgdo/types/array.py
CHANGED
@@ -109,14 +109,14 @@ class Array(LGDOCollection):
|
|
109
109
|
|
110
110
|
@property
|
111
111
|
def shape(self):
|
112
|
-
return (len(self),
|
112
|
+
return (len(self), *self._nda.shape[1:])
|
113
113
|
|
114
114
|
def reserve_capacity(self, capacity: int) -> None:
|
115
115
|
"Set size (number of rows) of internal memory buffer"
|
116
116
|
if capacity < len(self):
|
117
117
|
msg = "Cannot reduce capacity below Array length"
|
118
118
|
raise ValueError(msg)
|
119
|
-
self._nda.resize((capacity,
|
119
|
+
self._nda.resize((capacity, *self._nda.shape[1:]), refcheck=False)
|
120
120
|
|
121
121
|
def get_capacity(self) -> int:
|
122
122
|
"Get capacity (i.e. max size before memory must be re-allocated)"
|
@@ -190,6 +190,9 @@ class Array(LGDOCollection):
|
|
190
190
|
|
191
191
|
return False
|
192
192
|
|
193
|
+
def __hash__(self):
|
194
|
+
return hash(self.name)
|
195
|
+
|
193
196
|
def __iter__(self) -> Iterator:
|
194
197
|
yield from self.nda
|
195
198
|
|
lgdo/types/encoded.py
CHANGED
@@ -92,6 +92,9 @@ class VectorOfEncodedVectors(LGDOCollection):
|
|
92
92
|
|
93
93
|
return False
|
94
94
|
|
95
|
+
def __hash__(self):
|
96
|
+
return hash(self.name)
|
97
|
+
|
95
98
|
def reserve_capacity(self, *capacity: int) -> None:
|
96
99
|
self.encoded_data.reserve_capacity(*capacity)
|
97
100
|
self.decoded_size.reserve_capacity(capacity[0])
|
@@ -345,6 +348,9 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
|
|
345
348
|
|
346
349
|
return False
|
347
350
|
|
351
|
+
def __hash__(self):
|
352
|
+
return hash(self.name)
|
353
|
+
|
348
354
|
def reserve_capacity(self, *capacity: int) -> None:
|
349
355
|
self.encoded_data.reserve_capacity(capacity)
|
350
356
|
|
lgdo/types/scalar.py
CHANGED
lgdo/types/struct.py
CHANGED
@@ -5,7 +5,9 @@ utilities.
|
|
5
5
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import copy
|
8
9
|
import logging
|
10
|
+
import re
|
9
11
|
from collections.abc import Mapping
|
10
12
|
from typing import Any
|
11
13
|
|
@@ -56,7 +58,21 @@ class Struct(LGDO, dict):
|
|
56
58
|
# assign
|
57
59
|
super().update({k: v})
|
58
60
|
|
59
|
-
#
|
61
|
+
# check the datatype attribute passed by the user and sort the fields
|
62
|
+
# to ensure consistent behavior
|
63
|
+
if attrs is not None and "datatype" in attrs:
|
64
|
+
_attrs = copy.copy(dict(attrs))
|
65
|
+
|
66
|
+
if not _is_struct_datatype(self.datatype_name(), _attrs["datatype"]):
|
67
|
+
msg = (
|
68
|
+
f"datatype attribute ({self.attrs['datatype']}) is not "
|
69
|
+
f"compatible with class datatype!"
|
70
|
+
)
|
71
|
+
raise ValueError(msg)
|
72
|
+
|
73
|
+
_attrs["datatype"] = _sort_datatype_fields(_attrs["datatype"])
|
74
|
+
attrs = _attrs
|
75
|
+
|
60
76
|
super().__init__(attrs)
|
61
77
|
|
62
78
|
def datatype_name(self) -> str:
|
@@ -64,7 +80,10 @@ class Struct(LGDO, dict):
|
|
64
80
|
|
65
81
|
def form_datatype(self) -> str:
|
66
82
|
return (
|
67
|
-
self.datatype_name()
|
83
|
+
self.datatype_name()
|
84
|
+
+ "{"
|
85
|
+
+ ",".join(sorted([str(k) for k in self.keys()]))
|
86
|
+
+ "}"
|
68
87
|
)
|
69
88
|
|
70
89
|
def update_datatype(self) -> None:
|
@@ -157,3 +176,34 @@ class Struct(LGDO, dict):
|
|
157
176
|
"not possible. Call view_as() on the fields instead."
|
158
177
|
)
|
159
178
|
raise NotImplementedError(msg)
|
179
|
+
|
180
|
+
|
181
|
+
def _is_struct_datatype(dt_name, expr):
|
182
|
+
return re.search("^" + dt_name + r"\{(.*)\}$", expr) is not None
|
183
|
+
|
184
|
+
|
185
|
+
def _get_struct_fields(expr: str) -> list[str]:
|
186
|
+
assert _is_struct_datatype(".*", expr)
|
187
|
+
|
188
|
+
arr = re.search(r"\{(.*)\}$", expr).group(1).split(",")
|
189
|
+
if arr == [""]:
|
190
|
+
arr = []
|
191
|
+
|
192
|
+
return sorted(arr)
|
193
|
+
|
194
|
+
|
195
|
+
def _struct_datatype_equal(dt_name, dt1, dt2):
|
196
|
+
if any(not _is_struct_datatype(dt_name, dt) for dt in (dt1, dt2)):
|
197
|
+
return False
|
198
|
+
|
199
|
+
return _get_struct_fields(dt1) == _get_struct_fields(dt2)
|
200
|
+
|
201
|
+
|
202
|
+
def _sort_datatype_fields(expr):
|
203
|
+
assert _is_struct_datatype(".*", expr)
|
204
|
+
|
205
|
+
match = re.search(r"^(.*)\{.*\}$", expr)
|
206
|
+
struct_type = match.group(1)
|
207
|
+
fields = _get_struct_fields(expr)
|
208
|
+
|
209
|
+
return struct_type + "{" + ",".join(sorted([str(k) for k in fields])) + "}"
|
lgdo/types/table.py
CHANGED
@@ -81,8 +81,9 @@ class Table(Struct, LGDOCollection):
|
|
81
81
|
col_dict = _ak_to_lgdo_or_col_dict(col_dict)
|
82
82
|
|
83
83
|
# call Struct constructor
|
84
|
-
Struct.__init__(self, obj_dict=col_dict)
|
85
|
-
LGDOCollection
|
84
|
+
Struct.__init__(self, obj_dict=col_dict, attrs=attrs)
|
85
|
+
# no need to call the LGDOCollection constructor, as we are calling the
|
86
|
+
# Struct constructor already
|
86
87
|
|
87
88
|
# if col_dict is not empty, set size according to it
|
88
89
|
# if size is also supplied, resize all fields to match it
|
@@ -329,9 +330,10 @@ class Table(Struct, LGDOCollection):
|
|
329
330
|
:func:`numexpr.evaluate`` as `local_dict` argument or to
|
330
331
|
:func:`eval` as `locals` argument.
|
331
332
|
modules
|
332
|
-
a dictionary of additional modules used by the expression. If this
|
333
|
-
then :func:`eval`is used and the expression can
|
334
|
-
|
333
|
+
a dictionary of additional modules used by the expression. If this
|
334
|
+
is not `None` then :func:`eval`is used and the expression can
|
335
|
+
depend on any modules from this dictionary in addition to awkward
|
336
|
+
and numpy. These are passed to :func:`eval` as `globals` argument.
|
335
337
|
|
336
338
|
Examples
|
337
339
|
--------
|
@@ -402,7 +404,10 @@ class Table(Struct, LGDOCollection):
|
|
402
404
|
return _make_lgdo(out_data)
|
403
405
|
|
404
406
|
except Exception:
|
405
|
-
msg =
|
407
|
+
msg = (
|
408
|
+
f"Warning {expr} could not be evaluated with numexpr probably "
|
409
|
+
"due to some not allowed characters, trying with eval()."
|
410
|
+
)
|
406
411
|
log.debug(msg)
|
407
412
|
|
408
413
|
# resort to good ol' eval()
|
lgdo/types/vectorofvectors.py
CHANGED
@@ -284,6 +284,9 @@ class VectorOfVectors(LGDOCollection):
|
|
284
284
|
|
285
285
|
return False
|
286
286
|
|
287
|
+
def __hash__(self):
|
288
|
+
return hash(self.name)
|
289
|
+
|
287
290
|
def __getitem__(self, i: int) -> NDArray:
|
288
291
|
"""Return a view of the vector at index `i` along the first axis."""
|
289
292
|
if self.ndim == 2:
|
File without changes
|
File without changes
|
File without changes
|