legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/METADATA +2 -2
- legend_pydataobj-1.10.1.dist-info/RECORD +55 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/WHEEL +1 -1
- lgdo/_version.py +2 -2
- lgdo/compression/radware.py +8 -16
- lgdo/lh5/_serializers/read/array.py +9 -9
- lgdo/lh5/_serializers/read/composite.py +67 -78
- lgdo/lh5/_serializers/read/encoded.py +31 -9
- lgdo/lh5/_serializers/read/ndarray.py +55 -42
- lgdo/lh5/_serializers/read/scalar.py +10 -3
- lgdo/lh5/_serializers/read/utils.py +165 -3
- lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
- lgdo/lh5/_serializers/write/array.py +6 -1
- lgdo/lh5/_serializers/write/composite.py +14 -5
- lgdo/lh5/_serializers/write/scalar.py +6 -1
- lgdo/lh5/core.py +81 -7
- lgdo/lh5/exceptions.py +3 -3
- lgdo/lh5/iterator.py +258 -74
- lgdo/lh5/store.py +116 -12
- lgdo/lh5/tools.py +1 -1
- lgdo/lh5/utils.py +29 -44
- lgdo/types/histogram.py +122 -6
- lgdo/types/table.py +2 -2
- lgdo/types/vectorofvectors.py +1 -1
- legend_pydataobj-1.9.0.dist-info/RECORD +0 -55
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/top_level.txt +0 -0
@@ -4,17 +4,21 @@ import logging
|
|
4
4
|
import sys
|
5
5
|
from bisect import bisect_left
|
6
6
|
|
7
|
+
import h5py
|
7
8
|
import numpy as np
|
8
9
|
|
9
10
|
from ....types import Array
|
10
11
|
from ... import datatype
|
11
12
|
from ...exceptions import LH5DecodeError
|
13
|
+
from .utils import read_attrs
|
12
14
|
|
13
15
|
log = logging.getLogger(__name__)
|
14
16
|
|
15
17
|
|
16
18
|
def _h5_read_ndarray(
|
17
19
|
h5d,
|
20
|
+
fname,
|
21
|
+
oname,
|
18
22
|
start_row=0,
|
19
23
|
n_rows=sys.maxsize,
|
20
24
|
idx=None,
|
@@ -24,48 +28,48 @@ def _h5_read_ndarray(
|
|
24
28
|
):
|
25
29
|
if obj_buf is not None and not isinstance(obj_buf, Array):
|
26
30
|
msg = "object buffer is not an Array"
|
27
|
-
raise LH5DecodeError(msg,
|
31
|
+
raise LH5DecodeError(msg, fname, oname)
|
28
32
|
|
29
33
|
# compute the number of rows to read
|
30
34
|
# we culled idx above for start_row and n_rows, now we have to apply
|
31
35
|
# the constraint of the length of the dataset
|
32
36
|
try:
|
33
|
-
|
37
|
+
fspace = h5d.get_space()
|
38
|
+
ds_n_rows = fspace.shape[0]
|
34
39
|
except AttributeError as e:
|
35
40
|
msg = "does not seem to be an HDF5 dataset"
|
36
|
-
raise LH5DecodeError(msg,
|
41
|
+
raise LH5DecodeError(msg, fname, oname) from e
|
37
42
|
|
38
43
|
if idx is not None:
|
39
|
-
if len(idx
|
44
|
+
if len(idx) > 0 and idx[-1] >= ds_n_rows:
|
40
45
|
log.warning("idx indexed past the end of the array in the file. Culling...")
|
41
|
-
n_rows_to_read = bisect_left(idx
|
42
|
-
idx =
|
43
|
-
if len(idx
|
46
|
+
n_rows_to_read = bisect_left(idx, ds_n_rows)
|
47
|
+
idx = idx[:n_rows_to_read]
|
48
|
+
if len(idx) == 0:
|
44
49
|
log.warning("idx empty after culling.")
|
45
|
-
n_rows_to_read = len(idx
|
50
|
+
n_rows_to_read = len(idx)
|
46
51
|
else:
|
47
52
|
n_rows_to_read = ds_n_rows - start_row
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
source_sel = np.s_[start_row : start_row + n_rows_to_read]
|
53
|
+
n_rows_to_read = min(n_rows_to_read, n_rows)
|
54
|
+
|
55
|
+
if idx is None:
|
56
|
+
fspace.select_hyperslab(
|
57
|
+
(start_row,) + (0,) * (h5d.rank - 1),
|
58
|
+
(1,) * h5d.rank,
|
59
|
+
None,
|
60
|
+
(n_rows_to_read,) + fspace.shape[1:],
|
61
|
+
)
|
62
|
+
elif use_h5idx:
|
63
|
+
# Note that h5s will automatically merge adjacent elements into a range
|
64
|
+
fspace.select_none()
|
65
|
+
for i in idx:
|
66
|
+
fspace.select_hyperslab(
|
67
|
+
(i,) + (0,) * (h5d.rank - 1),
|
68
|
+
(1,) * h5d.rank,
|
69
|
+
None,
|
70
|
+
(1,) + fspace.shape[1:],
|
71
|
+
h5py.h5s.SELECT_OR,
|
72
|
+
)
|
69
73
|
|
70
74
|
# Now read the array
|
71
75
|
if obj_buf is not None and n_rows_to_read > 0:
|
@@ -74,30 +78,39 @@ def _h5_read_ndarray(
|
|
74
78
|
obj_buf.resize(buf_size)
|
75
79
|
dest_sel = np.s_[obj_buf_start:buf_size]
|
76
80
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
if idx is None or use_h5idx:
|
82
|
+
mspace = h5py.h5s.create_simple(obj_buf.nda.shape)
|
83
|
+
mspace.select_hyperslab(
|
84
|
+
(obj_buf_start,) + (0,) * (h5d.rank - 1),
|
85
|
+
(1,) * h5d.rank,
|
86
|
+
None,
|
87
|
+
(n_rows_to_read,) + fspace.shape[1:],
|
88
|
+
)
|
89
|
+
h5d.read(mspace, fspace, obj_buf.nda)
|
81
90
|
else:
|
82
|
-
|
83
|
-
|
84
|
-
|
91
|
+
tmp = np.empty(fspace.shape, h5d.dtype)
|
92
|
+
h5d.read(fspace, fspace, tmp)
|
93
|
+
obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
|
85
94
|
nda = obj_buf.nda
|
86
95
|
elif n_rows == 0:
|
87
96
|
tmp_shape = (0,) + h5d.shape[1:]
|
88
97
|
nda = np.empty(tmp_shape, h5d.dtype)
|
89
|
-
elif change_idx_to_slice or idx is None or use_h5idx:
|
90
|
-
nda = h5d[source_sel]
|
91
98
|
else:
|
92
|
-
|
93
|
-
nda = h5d
|
99
|
+
mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
|
100
|
+
nda = np.empty(mspace.shape, h5d.dtype)
|
101
|
+
if idx is None or use_h5idx:
|
102
|
+
h5d.read(mspace, fspace, nda)
|
103
|
+
else:
|
104
|
+
tmp = np.empty(fspace.shape, h5d.dtype)
|
105
|
+
h5d.read(fspace, fspace, tmp)
|
106
|
+
nda[:, ...] = tmp[idx, ...]
|
94
107
|
|
95
108
|
# Finally, set attributes and return objects
|
96
|
-
attrs =
|
109
|
+
attrs = read_attrs(h5d, fname, oname)
|
97
110
|
|
98
111
|
# special handling for bools
|
99
112
|
# (c and Julia store as uint8 so cast to bool)
|
100
113
|
if datatype.get_nested_datatype_string(attrs["datatype"]) == "bool":
|
101
|
-
nda = nda.astype(np.bool_)
|
114
|
+
nda = nda.astype(np.bool_, copy=False)
|
102
115
|
|
103
116
|
return (nda, attrs, n_rows_to_read)
|
@@ -2,20 +2,27 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
|
5
|
+
import h5py
|
5
6
|
import numpy as np
|
6
7
|
|
7
8
|
from ....types import Scalar
|
8
9
|
from ...exceptions import LH5DecodeError
|
10
|
+
from . import utils
|
9
11
|
|
10
12
|
log = logging.getLogger(__name__)
|
11
13
|
|
12
14
|
|
13
15
|
def _h5_read_scalar(
|
14
16
|
h5d,
|
17
|
+
fname,
|
18
|
+
oname,
|
15
19
|
obj_buf=None,
|
16
20
|
):
|
17
|
-
value =
|
18
|
-
|
21
|
+
value = np.empty((), h5d.dtype)
|
22
|
+
sp = h5py.h5s.create(h5py.h5s.SCALAR)
|
23
|
+
h5d.read(sp, sp, value)
|
24
|
+
value = value[()]
|
25
|
+
attrs = utils.read_attrs(h5d, fname, oname)
|
19
26
|
|
20
27
|
# special handling for bools
|
21
28
|
# (c and Julia store as uint8 so cast to bool)
|
@@ -25,7 +32,7 @@ def _h5_read_scalar(
|
|
25
32
|
if obj_buf is not None:
|
26
33
|
if not isinstance(obj_buf, Scalar):
|
27
34
|
msg = "object buffer a Scalar"
|
28
|
-
raise LH5DecodeError(msg,
|
35
|
+
raise LH5DecodeError(msg, fname, oname)
|
29
36
|
|
30
37
|
obj_buf.value = value
|
31
38
|
obj_buf.attrs.update(attrs)
|
@@ -1,12 +1,174 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import h5py
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from .... import types
|
9
|
+
from ... import datatype
|
3
10
|
from ...exceptions import LH5DecodeError
|
11
|
+
from . import scalar
|
4
12
|
|
13
|
+
log = logging.getLogger(__name__)
|
5
14
|
|
6
|
-
|
15
|
+
|
16
|
+
def check_obj_buf_attrs(attrs, new_attrs, fname, oname):
|
7
17
|
if set(attrs.keys()) != set(new_attrs.keys()):
|
8
18
|
msg = (
|
9
19
|
f"existing buffer and new data chunk have different attributes: "
|
10
|
-
f"obj_buf.attrs={attrs} != {
|
20
|
+
f"obj_buf.attrs={attrs} != {fname}[{oname}].attrs={new_attrs}"
|
11
21
|
)
|
12
|
-
raise LH5DecodeError(msg,
|
22
|
+
raise LH5DecodeError(msg, fname, oname)
|
23
|
+
|
24
|
+
|
25
|
+
def read_attrs(h5o, fname, oname):
|
26
|
+
"""Read all attributes for an hdf5 dataset or group using low level API
|
27
|
+
and return them as a dict. Assume all are strings or scalar types."""
|
28
|
+
attrs = {}
|
29
|
+
for i_attr in range(h5py.h5a.get_num_attrs(h5o)):
|
30
|
+
h5a = h5py.h5a.open(h5o, index=i_attr)
|
31
|
+
name = h5a.get_name().decode()
|
32
|
+
if h5a.shape != ():
|
33
|
+
msg = f"attribute {oname} is not a string or scalar"
|
34
|
+
raise LH5DecodeError(msg, fname, oname)
|
35
|
+
val = np.empty((), h5a.dtype)
|
36
|
+
h5a.read(val)
|
37
|
+
if h5a.get_type().get_class() == h5py.h5t.STRING:
|
38
|
+
attrs[name] = val.item().decode()
|
39
|
+
else:
|
40
|
+
attrs[name] = val.item()
|
41
|
+
h5a.close()
|
42
|
+
return attrs
|
43
|
+
|
44
|
+
|
45
|
+
def read_n_rows(h5o, fname, oname):
|
46
|
+
"""Read number of rows in LH5 object"""
|
47
|
+
if not h5py.h5a.exists(h5o, b"datatype"):
|
48
|
+
msg = "missing 'datatype' attribute"
|
49
|
+
raise LH5DecodeError(msg, fname, oname)
|
50
|
+
|
51
|
+
h5a = h5py.h5a.open(h5o, b"datatype")
|
52
|
+
type_attr = np.empty((), h5a.dtype)
|
53
|
+
h5a.read(type_attr)
|
54
|
+
type_attr = type_attr.item().decode()
|
55
|
+
lgdotype = datatype.datatype(type_attr)
|
56
|
+
|
57
|
+
# scalars are dim-0 datasets
|
58
|
+
if lgdotype is types.Scalar:
|
59
|
+
return None
|
60
|
+
|
61
|
+
# structs don't have rows
|
62
|
+
if lgdotype is types.Struct:
|
63
|
+
return None
|
64
|
+
|
65
|
+
# tables should have elements with all the same length
|
66
|
+
if lgdotype is types.Table:
|
67
|
+
# read out each of the fields
|
68
|
+
rows_read = None
|
69
|
+
for field in datatype.get_struct_fields(type_attr):
|
70
|
+
obj = h5py.h5o.open(h5o, field.encode())
|
71
|
+
n_rows_read = read_n_rows(obj, fname, field)
|
72
|
+
obj.close()
|
73
|
+
if not rows_read:
|
74
|
+
rows_read = n_rows_read
|
75
|
+
elif rows_read != n_rows_read:
|
76
|
+
log.warning(
|
77
|
+
f"'{field}' field in table '{oname}' has {rows_read} rows, "
|
78
|
+
f"{n_rows_read} was expected"
|
79
|
+
)
|
80
|
+
|
81
|
+
return rows_read
|
82
|
+
|
83
|
+
# length of vector of vectors is the length of its cumulative_length
|
84
|
+
if lgdotype is types.VectorOfVectors:
|
85
|
+
obj = h5py.h5o.open(h5o, b"cumulative_length")
|
86
|
+
n_rows = read_n_rows(obj, fname, "cumulative_length")
|
87
|
+
obj.close()
|
88
|
+
return n_rows
|
89
|
+
|
90
|
+
# length of vector of encoded vectors is the length of its decoded_size
|
91
|
+
if lgdotype in (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays):
|
92
|
+
obj = h5py.h5o.open(h5o, b"encoded_data")
|
93
|
+
n_rows = read_n_rows(obj, fname, "encoded_data")
|
94
|
+
obj.close()
|
95
|
+
return n_rows
|
96
|
+
|
97
|
+
# return array length (without reading the array!)
|
98
|
+
if issubclass(lgdotype, types.Array):
|
99
|
+
# compute the number of rows to read
|
100
|
+
return h5o.get_space().shape[0]
|
101
|
+
|
102
|
+
msg = f"don't know how to read rows of LGDO {lgdotype.__name__}"
|
103
|
+
raise LH5DecodeError(msg, fname, oname)
|
104
|
+
|
105
|
+
|
106
|
+
def read_size_in_bytes(h5o, fname, oname, field_mask=None):
|
107
|
+
"""Read number size in LH5 object in memory (in B)"""
|
108
|
+
if not h5py.h5a.exists(h5o, b"datatype"):
|
109
|
+
msg = "missing 'datatype' attribute"
|
110
|
+
raise LH5DecodeError(msg, fname, oname)
|
111
|
+
|
112
|
+
h5a = h5py.h5a.open(h5o, b"datatype")
|
113
|
+
type_attr = np.empty((), h5a.dtype)
|
114
|
+
h5a.read(type_attr)
|
115
|
+
type_attr = type_attr.item().decode()
|
116
|
+
lgdotype = datatype.datatype(type_attr)
|
117
|
+
|
118
|
+
# scalars are dim-0 datasets
|
119
|
+
if lgdotype in (
|
120
|
+
types.Scalar,
|
121
|
+
types.Array,
|
122
|
+
types.ArrayOfEqualSizedArrays,
|
123
|
+
types.FixedSizeArray,
|
124
|
+
):
|
125
|
+
return int(np.prod(h5o.shape) * h5o.dtype.itemsize)
|
126
|
+
|
127
|
+
# structs don't have rows
|
128
|
+
if lgdotype in (types.Struct, types.Histogram, types.Histogram.Axis):
|
129
|
+
size = 0
|
130
|
+
for key in h5o:
|
131
|
+
obj = h5py.h5o.open(h5o, key)
|
132
|
+
size += read_size_in_bytes(obj, fname, oname, field_mask)
|
133
|
+
obj.close()
|
134
|
+
return size
|
135
|
+
|
136
|
+
# tables should have elements with all the same length
|
137
|
+
if lgdotype in (types.Table, types.WaveformTable):
|
138
|
+
# read out each of the fields
|
139
|
+
size = 0
|
140
|
+
if not field_mask:
|
141
|
+
field_mask = datatype.get_struct_fields(type_attr)
|
142
|
+
for field in field_mask:
|
143
|
+
obj = h5py.h5o.open(h5o, field.encode())
|
144
|
+
size += read_size_in_bytes(obj, fname, field)
|
145
|
+
obj.close()
|
146
|
+
return size
|
147
|
+
|
148
|
+
# length of vector of vectors is the length of its cumulative_length
|
149
|
+
if lgdotype is types.VectorOfVectors:
|
150
|
+
size = 0
|
151
|
+
obj = h5py.h5o.open(h5o, b"cumulative_length")
|
152
|
+
size += read_size_in_bytes(obj, fname, "cumulative_length")
|
153
|
+
obj.close()
|
154
|
+
obj = h5py.h5o.open(h5o, b"flattened_data")
|
155
|
+
size += read_size_in_bytes(obj, fname, "flattened_data")
|
156
|
+
obj.close()
|
157
|
+
return size
|
158
|
+
|
159
|
+
# length of vector of encoded vectors is the length of its decoded_size
|
160
|
+
if lgdotype is types.ArrayOfEncodedEqualSizedArrays:
|
161
|
+
obj = h5py.h5o.open(h5o, b"decoded_size")
|
162
|
+
size = scalar._h5_read_scalar(obj, fname, "decoded_size")[0].value
|
163
|
+
obj.close()
|
164
|
+
|
165
|
+
obj = h5py.h5o.open(h5o, b"encoded_data")
|
166
|
+
cl = h5py.h5o.open(obj, b"cumulative_length")
|
167
|
+
size *= cl.shape[0]
|
168
|
+
size *= 4 # TODO: UPDATE WHEN CODECS SUPPORT MORE DTYPES
|
169
|
+
obj.close()
|
170
|
+
|
171
|
+
return size
|
172
|
+
|
173
|
+
msg = f"don't know how to read size of LGDO {lgdotype.__name__}"
|
174
|
+
raise LH5DecodeError(msg, fname, oname)
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
|
6
|
+
import h5py
|
6
7
|
import numba
|
7
8
|
import numpy as np
|
8
9
|
|
@@ -15,12 +16,15 @@ from ...exceptions import LH5DecodeError
|
|
15
16
|
from .array import (
|
16
17
|
_h5_read_array,
|
17
18
|
)
|
19
|
+
from .utils import read_attrs
|
18
20
|
|
19
21
|
log = logging.getLogger(__name__)
|
20
22
|
|
21
23
|
|
22
24
|
def _h5_read_vector_of_vectors(
|
23
25
|
h5g,
|
26
|
+
fname,
|
27
|
+
oname,
|
24
28
|
start_row=0,
|
25
29
|
n_rows=sys.maxsize,
|
26
30
|
idx=None,
|
@@ -30,12 +34,15 @@ def _h5_read_vector_of_vectors(
|
|
30
34
|
):
|
31
35
|
if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
|
32
36
|
msg = "object buffer is not a VectorOfVectors"
|
33
|
-
raise LH5DecodeError(msg,
|
37
|
+
raise LH5DecodeError(msg, fname, oname)
|
34
38
|
|
35
39
|
# read out cumulative_length
|
36
40
|
cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
|
41
|
+
h5d_cl = h5py.h5d.open(h5g, b"cumulative_length")
|
37
42
|
cumulative_length, n_rows_read = _h5_read_array(
|
38
|
-
|
43
|
+
h5d_cl,
|
44
|
+
fname,
|
45
|
+
f"{oname}/cumulative_length",
|
39
46
|
start_row=start_row,
|
40
47
|
n_rows=n_rows,
|
41
48
|
idx=idx,
|
@@ -51,17 +58,19 @@ def _h5_read_vector_of_vectors(
|
|
51
58
|
if idx is not None and n_rows_read > 0:
|
52
59
|
# get the starting indices for each array in flattened data:
|
53
60
|
# the starting index for array[i] is cumulative_length[i-1]
|
54
|
-
idx2 =
|
61
|
+
idx2 = np.asarray(idx).copy() - 1
|
55
62
|
|
56
63
|
# re-read cumulative_length with these indices
|
57
64
|
# note this will allocate memory for fd_starts!
|
58
65
|
fd_start = None
|
59
|
-
if idx2[0]
|
60
|
-
idx2 =
|
66
|
+
if idx2[0] == -1:
|
67
|
+
idx2 = idx2[1:]
|
61
68
|
fd_start = 0 # this variable avoids an ndarray append
|
62
69
|
|
63
70
|
fd_starts, fds_n_rows_read = _h5_read_array(
|
64
|
-
|
71
|
+
h5d_cl,
|
72
|
+
fname,
|
73
|
+
f"{oname}/cumulative_length",
|
65
74
|
start_row=start_row,
|
66
75
|
n_rows=n_rows,
|
67
76
|
idx=idx2,
|
@@ -98,7 +107,11 @@ def _h5_read_vector_of_vectors(
|
|
98
107
|
# need to read out the cumulen sample -before- the first sample
|
99
108
|
# read above in order to get the starting row of the first
|
100
109
|
# vector to read out in flattened_data
|
101
|
-
|
110
|
+
fspace = h5d_cl.get_space()
|
111
|
+
fspace.select_elements([[start_row - 1]])
|
112
|
+
mspace = h5py.h5s.create(h5py.h5s.SCALAR)
|
113
|
+
fd_start = np.empty((), h5d_cl.dtype)
|
114
|
+
h5d_cl.read(mspace, fspace, fd_start)
|
102
115
|
|
103
116
|
# check limits for values that will be used subsequently
|
104
117
|
if this_cumulen_nda[-1] < fd_start:
|
@@ -112,7 +125,7 @@ def _h5_read_vector_of_vectors(
|
|
112
125
|
f"cumulative_length non-increasing between entries "
|
113
126
|
f"{start_row} and {start_row+n_rows_read}"
|
114
127
|
)
|
115
|
-
raise LH5DecodeError(msg,
|
128
|
+
raise LH5DecodeError(msg, fname, oname)
|
116
129
|
|
117
130
|
# determine the number of rows for the flattened_data readout
|
118
131
|
fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
|
@@ -126,6 +139,8 @@ def _h5_read_vector_of_vectors(
|
|
126
139
|
# read for flattened_data
|
127
140
|
this_cumulen_nda -= fd_start
|
128
141
|
|
142
|
+
h5d_cl.close()
|
143
|
+
|
129
144
|
# If we started with a partially-filled buffer, add the
|
130
145
|
# appropriate offset for the start of the in-memory flattened
|
131
146
|
# data for this read.
|
@@ -141,20 +156,26 @@ def _h5_read_vector_of_vectors(
|
|
141
156
|
# grow fd_buf if necessary to hold the data
|
142
157
|
fdb_size = fd_buf_start + fd_n_rows
|
143
158
|
if len(fd_buf) < fdb_size:
|
144
|
-
fd_buf.resize(fdb_size)
|
159
|
+
fd_buf.nda.resize(fdb_size, refcheck=False)
|
145
160
|
|
146
161
|
# now read
|
147
|
-
|
162
|
+
h5o = h5py.h5o.open(h5g, b"flattened_data")
|
163
|
+
h5a_dtype = h5py.h5a.open(h5o, b"datatype")
|
164
|
+
val = np.empty((), "O")
|
165
|
+
h5a_dtype.read(val)
|
166
|
+
lgdotype = dtypeutils.datatype(val.item().decode())
|
148
167
|
if lgdotype is Array:
|
149
168
|
_func = _h5_read_array
|
150
169
|
elif lgdotype is VectorOfVectors:
|
151
170
|
_func = _h5_read_vector_of_vectors
|
152
171
|
else:
|
153
172
|
msg = "type {lgdotype.__name__} is not supported"
|
154
|
-
raise LH5DecodeError(msg,
|
173
|
+
raise LH5DecodeError(msg, fname, f"{oname}/flattened_data")
|
155
174
|
|
156
175
|
flattened_data, _ = _func(
|
157
|
-
|
176
|
+
h5o,
|
177
|
+
fname,
|
178
|
+
f"{oname}/flattened_data",
|
158
179
|
start_row=fd_start,
|
159
180
|
n_rows=fd_n_rows,
|
160
181
|
idx=fd_idx,
|
@@ -162,6 +183,7 @@ def _h5_read_vector_of_vectors(
|
|
162
183
|
obj_buf=fd_buf,
|
163
184
|
obj_buf_start=fd_buf_start,
|
164
185
|
)
|
186
|
+
h5o.close()
|
165
187
|
|
166
188
|
if obj_buf is not None:
|
167
189
|
# if the buffer is partially filled, cumulative_length will be invalid
|
@@ -176,7 +198,7 @@ def _h5_read_vector_of_vectors(
|
|
176
198
|
VectorOfVectors(
|
177
199
|
flattened_data=flattened_data,
|
178
200
|
cumulative_length=cumulative_length,
|
179
|
-
attrs=
|
201
|
+
attrs=read_attrs(h5g, fname, oname),
|
180
202
|
),
|
181
203
|
n_rows_read,
|
182
204
|
)
|
@@ -194,4 +216,4 @@ def _make_fd_idx(starts, stops, idx):
|
|
194
216
|
for i in range(starts[j], stops[j]):
|
195
217
|
idx[k] = i
|
196
218
|
k += 1
|
197
|
-
return
|
219
|
+
return idx
|
@@ -71,7 +71,12 @@ def _h5_write_array(
|
|
71
71
|
_attrs = obj.getattrs(datatype=True)
|
72
72
|
_attrs.pop("compression", None)
|
73
73
|
_attrs.pop("hdf5_settings", None)
|
74
|
-
ds.attrs.update(
|
74
|
+
ds.attrs.update(
|
75
|
+
{
|
76
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
77
|
+
for k, v in _attrs.items()
|
78
|
+
}
|
79
|
+
)
|
75
80
|
|
76
81
|
return
|
77
82
|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
+
import os
|
5
|
+
from inspect import signature
|
4
6
|
|
5
7
|
import h5py
|
6
8
|
|
@@ -27,6 +29,10 @@ def _h5_write_lgdo(
|
|
27
29
|
):
|
28
30
|
assert isinstance(obj, types.LGDO)
|
29
31
|
|
32
|
+
file_kwargs = {
|
33
|
+
k: h5py_kwargs[k] for k in h5py_kwargs & signature(h5py.File).parameters.keys()
|
34
|
+
}
|
35
|
+
h5py_kwargs = {k: h5py_kwargs[k] for k in h5py_kwargs - file_kwargs.keys()}
|
30
36
|
if wo_mode == "write_safe":
|
31
37
|
wo_mode = "w"
|
32
38
|
if wo_mode == "append":
|
@@ -46,10 +52,9 @@ def _h5_write_lgdo(
|
|
46
52
|
# In hdf5, 'a' is really "modify" -- in addition to appending, you can
|
47
53
|
# change any object in the file. So we use file:append for
|
48
54
|
# write_object:overwrite.
|
49
|
-
mode = "w" if wo_mode == "of" else "a"
|
50
|
-
|
51
55
|
if not isinstance(lh5_file, h5py.File):
|
52
|
-
|
56
|
+
mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
|
57
|
+
lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
|
53
58
|
|
54
59
|
log.debug(
|
55
60
|
f"writing {obj!r}[{start_row}:{n_rows}] as "
|
@@ -65,8 +70,12 @@ def _h5_write_lgdo(
|
|
65
70
|
|
66
71
|
# struct, table, waveform table or histogram.
|
67
72
|
if isinstance(obj, types.Struct):
|
68
|
-
if
|
69
|
-
|
73
|
+
if (
|
74
|
+
isinstance(obj, types.Histogram)
|
75
|
+
and wo_mode not in ["w", "o", "of"]
|
76
|
+
and name in group
|
77
|
+
):
|
78
|
+
msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
|
70
79
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
71
80
|
if isinstance(obj, types.Histogram) and write_start != 0:
|
72
81
|
msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
|
@@ -20,4 +20,9 @@ def _h5_write_scalar(obj, name, lh5_file, group="/", wo_mode="append"):
|
|
20
20
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
21
21
|
|
22
22
|
ds = group.create_dataset(name, shape=(), data=obj.value)
|
23
|
-
ds.attrs.update(
|
23
|
+
ds.attrs.update(
|
24
|
+
{
|
25
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
26
|
+
for k, v in obj.attrs.items()
|
27
|
+
}
|
28
|
+
)
|