legend-pydataobj 1.5.1__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.1.dist-info}/METADATA +1 -1
- legend_pydataobj-1.6.1.dist-info/RECORD +54 -0
- {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.1.dist-info}/WHEEL +1 -1
- {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.1.dist-info}/entry_points.txt +1 -0
- lgdo/__init__.py +7 -4
- lgdo/_version.py +2 -2
- lgdo/cli.py +237 -12
- lgdo/compression/__init__.py +1 -0
- lgdo/lh5/__init__.py +9 -1
- lgdo/lh5/_serializers/__init__.py +43 -0
- lgdo/lh5/_serializers/read/__init__.py +0 -0
- lgdo/lh5/_serializers/read/array.py +34 -0
- lgdo/lh5/_serializers/read/composite.py +405 -0
- lgdo/lh5/_serializers/read/encoded.py +129 -0
- lgdo/lh5/_serializers/read/ndarray.py +104 -0
- lgdo/lh5/_serializers/read/scalar.py +34 -0
- lgdo/lh5/_serializers/read/utils.py +12 -0
- lgdo/lh5/_serializers/read/vector_of_vectors.py +201 -0
- lgdo/lh5/_serializers/write/__init__.py +0 -0
- lgdo/lh5/_serializers/write/array.py +92 -0
- lgdo/lh5/_serializers/write/composite.py +259 -0
- lgdo/lh5/_serializers/write/scalar.py +23 -0
- lgdo/lh5/_serializers/write/vector_of_vectors.py +95 -0
- lgdo/lh5/core.py +272 -0
- lgdo/lh5/datatype.py +46 -0
- lgdo/lh5/exceptions.py +34 -0
- lgdo/lh5/iterator.py +1 -1
- lgdo/lh5/store.py +69 -1160
- lgdo/lh5/tools.py +27 -53
- lgdo/lh5/utils.py +130 -27
- lgdo/lh5_store.py +11 -2
- lgdo/logging.py +1 -0
- lgdo/types/__init__.py +1 -0
- lgdo/types/array.py +1 -0
- lgdo/types/arrayofequalsizedarrays.py +1 -0
- lgdo/types/encoded.py +3 -8
- lgdo/types/fixedsizearray.py +1 -0
- lgdo/types/struct.py +1 -0
- lgdo/types/table.py +46 -5
- lgdo/types/vectorofvectors.py +314 -458
- lgdo/types/vovutils.py +320 -0
- lgdo/types/waveformtable.py +1 -0
- lgdo/utils.py +1 -32
- legend_pydataobj-1.5.1.dist-info/RECORD +0 -36
- {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.1.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,201 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
|
6
|
+
import numba
|
7
|
+
import numpy as np
|
8
|
+
|
9
|
+
from ....types import (
|
10
|
+
Array,
|
11
|
+
VectorOfVectors,
|
12
|
+
)
|
13
|
+
from ... import datatype as dtypeutils
|
14
|
+
from ...exceptions import LH5DecodeError
|
15
|
+
from .array import (
|
16
|
+
_h5_read_array,
|
17
|
+
)
|
18
|
+
|
19
|
+
log = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
def _h5_read_vector_of_vectors(
|
23
|
+
name,
|
24
|
+
h5f,
|
25
|
+
start_row=0,
|
26
|
+
n_rows=sys.maxsize,
|
27
|
+
idx=None,
|
28
|
+
use_h5idx=False,
|
29
|
+
obj_buf=None,
|
30
|
+
obj_buf_start=0,
|
31
|
+
):
|
32
|
+
if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
|
33
|
+
msg = "object buffer is not a VectorOfVectors"
|
34
|
+
raise LH5DecodeError(msg, h5f, name)
|
35
|
+
|
36
|
+
# read out cumulative_length
|
37
|
+
cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
|
38
|
+
cumulative_length, n_rows_read = _h5_read_array(
|
39
|
+
f"{name}/cumulative_length",
|
40
|
+
h5f,
|
41
|
+
start_row=start_row,
|
42
|
+
n_rows=n_rows,
|
43
|
+
idx=idx,
|
44
|
+
use_h5idx=use_h5idx,
|
45
|
+
obj_buf=cumulen_buf,
|
46
|
+
obj_buf_start=obj_buf_start,
|
47
|
+
)
|
48
|
+
# get a view of just what was read out for cleaner code below
|
49
|
+
this_cumulen_nda = cumulative_length.nda[
|
50
|
+
obj_buf_start : obj_buf_start + n_rows_read
|
51
|
+
]
|
52
|
+
|
53
|
+
if idx is not None and n_rows_read > 0:
|
54
|
+
# get the starting indices for each array in flattened data:
|
55
|
+
# the starting index for array[i] is cumulative_length[i-1]
|
56
|
+
idx2 = (np.asarray(idx[0]).copy() - 1,)
|
57
|
+
|
58
|
+
# re-read cumulative_length with these indices
|
59
|
+
# note this will allocate memory for fd_starts!
|
60
|
+
fd_start = None
|
61
|
+
if idx2[0][0] == -1:
|
62
|
+
idx2 = (idx2[0][1:],)
|
63
|
+
fd_start = 0 # this variable avoids an ndarray append
|
64
|
+
|
65
|
+
fd_starts, fds_n_rows_read = _h5_read_array(
|
66
|
+
f"{name}/cumulative_length",
|
67
|
+
h5f,
|
68
|
+
start_row=start_row,
|
69
|
+
n_rows=n_rows,
|
70
|
+
idx=idx2,
|
71
|
+
use_h5idx=use_h5idx,
|
72
|
+
obj_buf=None,
|
73
|
+
)
|
74
|
+
fd_starts = fd_starts.nda # we just need the nda
|
75
|
+
if fd_start is None:
|
76
|
+
fd_start = fd_starts[0]
|
77
|
+
|
78
|
+
# compute the length that flattened_data will have after the
|
79
|
+
# fancy-indexed read
|
80
|
+
fd_n_rows = np.sum(this_cumulen_nda[-len(fd_starts) :] - fd_starts)
|
81
|
+
if fd_start == 0:
|
82
|
+
fd_n_rows += this_cumulen_nda[0]
|
83
|
+
|
84
|
+
# now make fd_idx
|
85
|
+
fd_idx = np.empty(fd_n_rows, dtype="int32")
|
86
|
+
fd_idx = _make_fd_idx(fd_starts, this_cumulen_nda, fd_idx)
|
87
|
+
|
88
|
+
# Now clean up this_cumulen_nda, to be ready
|
89
|
+
# to match the in-memory version of flattened_data. Note: these
|
90
|
+
# operations on the view change the original array because they are
|
91
|
+
# numpy arrays, not lists.
|
92
|
+
this_cumulen_nda[-len(fd_starts) :] -= fd_starts
|
93
|
+
np.cumsum(this_cumulen_nda, out=this_cumulen_nda)
|
94
|
+
|
95
|
+
else:
|
96
|
+
fd_idx = None
|
97
|
+
|
98
|
+
# determine the start_row and n_rows for the flattened_data readout
|
99
|
+
fd_start = 0
|
100
|
+
if start_row > 0 and n_rows_read > 0:
|
101
|
+
# need to read out the cumulen sample -before- the first sample
|
102
|
+
# read above in order to get the starting row of the first
|
103
|
+
# vector to read out in flattened_data
|
104
|
+
fd_start = h5f[f"{name}/cumulative_length"][start_row - 1]
|
105
|
+
|
106
|
+
# check limits for values that will be used subsequently
|
107
|
+
if this_cumulen_nda[-1] < fd_start:
|
108
|
+
log.debug(
|
109
|
+
f"this_cumulen_nda[-1] = {this_cumulen_nda[-1]}, "
|
110
|
+
f"fd_start = {fd_start}, "
|
111
|
+
f"start_row = {start_row}, "
|
112
|
+
f"n_rows_read = {n_rows_read}"
|
113
|
+
)
|
114
|
+
msg = (
|
115
|
+
f"cumulative_length non-increasing between entries "
|
116
|
+
f"{start_row} and {start_row+n_rows_read}"
|
117
|
+
)
|
118
|
+
raise LH5DecodeError(msg, h5f, name)
|
119
|
+
|
120
|
+
# determine the number of rows for the flattened_data readout
|
121
|
+
fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
|
122
|
+
|
123
|
+
# Now done with this_cumulen_nda, so we can clean it up to be ready
|
124
|
+
# to match the in-memory version of flattened_data. Note: these
|
125
|
+
# operations on the view change the original array because they are
|
126
|
+
# numpy arrays, not lists.
|
127
|
+
#
|
128
|
+
# First we need to subtract off the in-file offset for the start of
|
129
|
+
# read for flattened_data
|
130
|
+
this_cumulen_nda -= fd_start
|
131
|
+
|
132
|
+
# If we started with a partially-filled buffer, add the
|
133
|
+
# appropriate offset for the start of the in-memory flattened
|
134
|
+
# data for this read.
|
135
|
+
fd_buf_start = np.uint32(0)
|
136
|
+
if obj_buf_start > 0:
|
137
|
+
fd_buf_start = cumulative_length.nda[obj_buf_start - 1]
|
138
|
+
this_cumulen_nda += fd_buf_start
|
139
|
+
|
140
|
+
# Now prepare the object buffer if necessary
|
141
|
+
fd_buf = None
|
142
|
+
if obj_buf is not None:
|
143
|
+
fd_buf = obj_buf.flattened_data
|
144
|
+
# grow fd_buf if necessary to hold the data
|
145
|
+
fdb_size = fd_buf_start + fd_n_rows
|
146
|
+
if len(fd_buf) < fdb_size:
|
147
|
+
fd_buf.resize(fdb_size)
|
148
|
+
|
149
|
+
# now read
|
150
|
+
lgdotype = dtypeutils.datatype(h5f[f"{name}/flattened_data"].attrs["datatype"])
|
151
|
+
if lgdotype is Array:
|
152
|
+
_func = _h5_read_array
|
153
|
+
elif lgdotype is VectorOfVectors:
|
154
|
+
_func = _h5_read_vector_of_vectors
|
155
|
+
else:
|
156
|
+
msg = "type {lgdotype.__name__} is not supported"
|
157
|
+
raise LH5DecodeError(msg, h5f, f"{name}/flattened_data")
|
158
|
+
|
159
|
+
flattened_data, _ = _func(
|
160
|
+
f"{name}/flattened_data",
|
161
|
+
h5f,
|
162
|
+
start_row=fd_start,
|
163
|
+
n_rows=fd_n_rows,
|
164
|
+
idx=fd_idx,
|
165
|
+
use_h5idx=use_h5idx,
|
166
|
+
obj_buf=fd_buf,
|
167
|
+
obj_buf_start=fd_buf_start,
|
168
|
+
)
|
169
|
+
|
170
|
+
if obj_buf is not None:
|
171
|
+
# if the buffer is partially filled, cumulative_length will be invalid
|
172
|
+
# (i.e. non monotonically increasing). Let's fix that but filling the
|
173
|
+
# rest of the array with the length of flattened_data
|
174
|
+
end = obj_buf_start + n_rows_read
|
175
|
+
obj_buf.cumulative_length.nda[end:] = obj_buf.cumulative_length.nda[end - 1]
|
176
|
+
|
177
|
+
return obj_buf, n_rows_read
|
178
|
+
|
179
|
+
return (
|
180
|
+
VectorOfVectors(
|
181
|
+
flattened_data=flattened_data,
|
182
|
+
cumulative_length=cumulative_length,
|
183
|
+
attrs=h5f[name].attrs,
|
184
|
+
),
|
185
|
+
n_rows_read,
|
186
|
+
)
|
187
|
+
|
188
|
+
|
189
|
+
@numba.njit(parallel=False, fastmath=True)
|
190
|
+
def _make_fd_idx(starts, stops, idx):
|
191
|
+
k = 0
|
192
|
+
if len(starts) < len(stops):
|
193
|
+
for i in range(stops[0]):
|
194
|
+
idx[k] = i
|
195
|
+
k += 1
|
196
|
+
stops = stops[1:]
|
197
|
+
for j in range(len(starts)):
|
198
|
+
for i in range(starts[j], stops[j]):
|
199
|
+
idx[k] = i
|
200
|
+
k += 1
|
201
|
+
return (idx,)
|
File without changes
|
@@ -0,0 +1,92 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import h5py
|
6
|
+
import numpy as np
|
7
|
+
|
8
|
+
from .... import types
|
9
|
+
from ...exceptions import LH5EncodeError
|
10
|
+
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
|
14
|
+
|
15
|
+
|
16
|
+
def _h5_write_array(
|
17
|
+
obj,
|
18
|
+
name,
|
19
|
+
lh5_file,
|
20
|
+
group="/",
|
21
|
+
start_row=0,
|
22
|
+
n_rows=None,
|
23
|
+
wo_mode="append",
|
24
|
+
write_start=0,
|
25
|
+
**h5py_kwargs,
|
26
|
+
):
|
27
|
+
assert isinstance(obj, types.Array)
|
28
|
+
|
29
|
+
if n_rows is None or n_rows > obj.nda.shape[0] - start_row:
|
30
|
+
n_rows = obj.nda.shape[0] - start_row
|
31
|
+
|
32
|
+
nda = obj.nda[start_row : start_row + n_rows]
|
33
|
+
|
34
|
+
# hack to store bools as uint8 for c / Julia compliance
|
35
|
+
if nda.dtype.name == "bool":
|
36
|
+
nda = nda.astype(np.uint8)
|
37
|
+
|
38
|
+
# need to create dataset from ndarray the first time for speed
|
39
|
+
# creating an empty dataset and appending to that is super slow!
|
40
|
+
if (wo_mode != "a" and write_start == 0) or name not in group:
|
41
|
+
# this is needed in order to have a resizable (in the first
|
42
|
+
# axis) data set, i.e. rows can be appended later
|
43
|
+
# NOTE: this automatically turns chunking on!
|
44
|
+
maxshape = (None,) + nda.shape[1:]
|
45
|
+
h5py_kwargs.setdefault("maxshape", maxshape)
|
46
|
+
|
47
|
+
if wo_mode == "o" and name in group:
|
48
|
+
log.debug(f"overwriting {name} in {group}")
|
49
|
+
del group[name]
|
50
|
+
|
51
|
+
# set default compression options
|
52
|
+
for k, v in DEFAULT_HDF5_SETTINGS.items():
|
53
|
+
h5py_kwargs.setdefault(k, v)
|
54
|
+
|
55
|
+
# compress using the 'compression' LGDO attribute, if available
|
56
|
+
if "compression" in obj.attrs:
|
57
|
+
comp_algo = obj.attrs["compression"]
|
58
|
+
if isinstance(comp_algo, dict):
|
59
|
+
h5py_kwargs |= obj.attrs["compression"]
|
60
|
+
else:
|
61
|
+
h5py_kwargs["compression"] = obj.attrs["compression"]
|
62
|
+
|
63
|
+
# and even the 'hdf5_settings' one, preferred
|
64
|
+
if "hdf5_settings" in obj.attrs:
|
65
|
+
h5py_kwargs |= obj.attrs["hdf5_settings"]
|
66
|
+
|
67
|
+
# create HDF5 dataset
|
68
|
+
ds = group.create_dataset(name, data=nda, **h5py_kwargs)
|
69
|
+
|
70
|
+
# attach HDF5 dataset attributes, but not "compression"!
|
71
|
+
_attrs = obj.getattrs(datatype=True)
|
72
|
+
_attrs.pop("compression", None)
|
73
|
+
_attrs.pop("hdf5_settings", None)
|
74
|
+
ds.attrs.update(_attrs)
|
75
|
+
|
76
|
+
return
|
77
|
+
|
78
|
+
# Now append or overwrite
|
79
|
+
ds = group[name]
|
80
|
+
if not isinstance(ds, h5py.Dataset):
|
81
|
+
msg = (
|
82
|
+
f"existing HDF5 object '{name}' in group '{group}'"
|
83
|
+
" is not a dataset! Cannot overwrite or append"
|
84
|
+
)
|
85
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
86
|
+
|
87
|
+
old_len = ds.shape[0]
|
88
|
+
if wo_mode == "a":
|
89
|
+
write_start = old_len
|
90
|
+
|
91
|
+
ds.resize(write_start + nda.shape[0], axis=0)
|
92
|
+
ds[write_start:] = nda
|
@@ -0,0 +1,259 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
import h5py
|
6
|
+
|
7
|
+
from .... import compression, types
|
8
|
+
from ... import datatype, utils
|
9
|
+
from ...exceptions import LH5EncodeError
|
10
|
+
from .array import _h5_write_array
|
11
|
+
from .scalar import _h5_write_scalar
|
12
|
+
from .vector_of_vectors import _h5_write_vector_of_vectors
|
13
|
+
|
14
|
+
log = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
|
17
|
+
def _h5_write_lgdo(
|
18
|
+
obj,
|
19
|
+
name,
|
20
|
+
lh5_file,
|
21
|
+
group="/",
|
22
|
+
start_row=0,
|
23
|
+
n_rows=None,
|
24
|
+
wo_mode="append",
|
25
|
+
write_start=0,
|
26
|
+
**h5py_kwargs,
|
27
|
+
):
|
28
|
+
assert isinstance(obj, types.LGDO)
|
29
|
+
|
30
|
+
if wo_mode == "write_safe":
|
31
|
+
wo_mode = "w"
|
32
|
+
if wo_mode == "append":
|
33
|
+
wo_mode = "a"
|
34
|
+
if wo_mode == "overwrite":
|
35
|
+
wo_mode = "o"
|
36
|
+
if wo_mode == "overwrite_file":
|
37
|
+
wo_mode = "of"
|
38
|
+
write_start = 0
|
39
|
+
if wo_mode == "append_column":
|
40
|
+
wo_mode = "ac"
|
41
|
+
if wo_mode not in ["w", "a", "o", "of", "ac"]:
|
42
|
+
msg = f"unknown wo_mode '{wo_mode}'"
|
43
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
44
|
+
|
45
|
+
# "mode" is for the h5df.File and wo_mode is for this function
|
46
|
+
# In hdf5, 'a' is really "modify" -- in addition to appending, you can
|
47
|
+
# change any object in the file. So we use file:append for
|
48
|
+
# write_object:overwrite.
|
49
|
+
mode = "w" if wo_mode == "of" else "a"
|
50
|
+
|
51
|
+
if not isinstance(lh5_file, h5py.File):
|
52
|
+
lh5_file = h5py.File(lh5_file, mode=mode)
|
53
|
+
|
54
|
+
log.debug(
|
55
|
+
f"writing {obj!r}[{start_row}:{n_rows}] as "
|
56
|
+
f"{lh5_file.filename}:{group}/{name}[{write_start}:], "
|
57
|
+
f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
|
58
|
+
)
|
59
|
+
|
60
|
+
group = utils.get_h5_group(group, lh5_file)
|
61
|
+
|
62
|
+
if wo_mode == "w" and name in group:
|
63
|
+
msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
|
64
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
65
|
+
|
66
|
+
# struct or table or waveform table
|
67
|
+
if isinstance(obj, types.Struct):
|
68
|
+
return _h5_write_struct(
|
69
|
+
obj,
|
70
|
+
name,
|
71
|
+
lh5_file,
|
72
|
+
group=group,
|
73
|
+
start_row=start_row,
|
74
|
+
n_rows=n_rows,
|
75
|
+
wo_mode=wo_mode,
|
76
|
+
write_start=write_start,
|
77
|
+
**h5py_kwargs,
|
78
|
+
)
|
79
|
+
|
80
|
+
# scalars
|
81
|
+
if isinstance(obj, types.Scalar):
|
82
|
+
return _h5_write_scalar(obj, name, lh5_file, group, wo_mode)
|
83
|
+
|
84
|
+
# vector of encoded vectors
|
85
|
+
if isinstance(
|
86
|
+
obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
|
87
|
+
):
|
88
|
+
group = utils.get_h5_group(
|
89
|
+
name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
|
90
|
+
)
|
91
|
+
|
92
|
+
# ask not to further compress flattened_data, it is already compressed!
|
93
|
+
obj.encoded_data.flattened_data.attrs["compression"] = None
|
94
|
+
|
95
|
+
_h5_write_vector_of_vectors(
|
96
|
+
obj.encoded_data,
|
97
|
+
"encoded_data",
|
98
|
+
lh5_file,
|
99
|
+
group=group,
|
100
|
+
start_row=start_row,
|
101
|
+
n_rows=n_rows,
|
102
|
+
wo_mode=wo_mode,
|
103
|
+
write_start=write_start,
|
104
|
+
**h5py_kwargs,
|
105
|
+
)
|
106
|
+
|
107
|
+
if isinstance(obj.decoded_size, types.Scalar):
|
108
|
+
_h5_write_scalar(
|
109
|
+
obj.decoded_size,
|
110
|
+
"decoded_size",
|
111
|
+
lh5_file,
|
112
|
+
group=group,
|
113
|
+
wo_mode=wo_mode,
|
114
|
+
)
|
115
|
+
else:
|
116
|
+
_h5_write_array(
|
117
|
+
obj.decoded_size,
|
118
|
+
"decoded_size",
|
119
|
+
lh5_file,
|
120
|
+
group=group,
|
121
|
+
start_row=start_row,
|
122
|
+
n_rows=n_rows,
|
123
|
+
wo_mode=wo_mode,
|
124
|
+
write_start=write_start,
|
125
|
+
**h5py_kwargs,
|
126
|
+
)
|
127
|
+
|
128
|
+
return None
|
129
|
+
|
130
|
+
# vector of vectors
|
131
|
+
if isinstance(obj, types.VectorOfVectors):
|
132
|
+
return _h5_write_vector_of_vectors(
|
133
|
+
obj,
|
134
|
+
name,
|
135
|
+
lh5_file,
|
136
|
+
group=group,
|
137
|
+
start_row=start_row,
|
138
|
+
n_rows=n_rows,
|
139
|
+
wo_mode=wo_mode,
|
140
|
+
write_start=write_start,
|
141
|
+
**h5py_kwargs,
|
142
|
+
)
|
143
|
+
|
144
|
+
# if we get this far, must be one of the Array types
|
145
|
+
if isinstance(obj, types.Array):
|
146
|
+
return _h5_write_array(
|
147
|
+
obj,
|
148
|
+
name,
|
149
|
+
lh5_file,
|
150
|
+
group=group,
|
151
|
+
start_row=start_row,
|
152
|
+
n_rows=n_rows,
|
153
|
+
wo_mode=wo_mode,
|
154
|
+
write_start=write_start,
|
155
|
+
**h5py_kwargs,
|
156
|
+
)
|
157
|
+
|
158
|
+
msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
|
159
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
160
|
+
|
161
|
+
|
162
|
+
def _h5_write_struct(
|
163
|
+
obj,
|
164
|
+
name,
|
165
|
+
lh5_file,
|
166
|
+
group="/",
|
167
|
+
start_row=0,
|
168
|
+
n_rows=None,
|
169
|
+
wo_mode="append",
|
170
|
+
write_start=0,
|
171
|
+
**h5py_kwargs,
|
172
|
+
):
|
173
|
+
assert isinstance(obj, types.Struct)
|
174
|
+
|
175
|
+
# In order to append a column, we need to update the
|
176
|
+
# `table{old_fields}` value in `group.attrs['datatype"]` to include
|
177
|
+
# the new fields. One way to do this is to override
|
178
|
+
# `obj.attrs["datatype"]` to include old and new fields. Then we
|
179
|
+
# can write the fields to the table as normal.
|
180
|
+
if wo_mode == "ac":
|
181
|
+
old_group = utils.get_h5_group(name, group)
|
182
|
+
lgdotype = datatype.datatype(old_group.attrs["datatype"])
|
183
|
+
fields = datatype.get_struct_fields(old_group.attrs["datatype"])
|
184
|
+
if not issubclass(lgdotype, types.Struct):
|
185
|
+
msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
|
186
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
187
|
+
|
188
|
+
# If the mode is `append_column`, make sure we aren't appending
|
189
|
+
# a table that has a column of the same name as in the existing
|
190
|
+
# table. Also make sure that the field we are adding has the
|
191
|
+
# same size
|
192
|
+
if len(list(set(fields).intersection(set(obj.keys())))) != 0:
|
193
|
+
msg = (
|
194
|
+
f"Can't append {list(set(fields).intersection(set(obj.keys())))} "
|
195
|
+
"column(s) to a table with the same field(s)"
|
196
|
+
)
|
197
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
198
|
+
# It doesn't matter what key we access, as all fields in the old table have the same size
|
199
|
+
if old_group[next(iter(old_group.keys()))].size != obj.size:
|
200
|
+
msg = (
|
201
|
+
f"Table sizes don't match. Trying to append column of size {obj.size} "
|
202
|
+
f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
|
203
|
+
)
|
204
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
205
|
+
|
206
|
+
# Now we can append the obj.keys() to the old fields, and then update obj.attrs.
|
207
|
+
fields.extend(list(obj.keys()))
|
208
|
+
obj.attrs.pop("datatype")
|
209
|
+
obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
|
210
|
+
|
211
|
+
group = utils.get_h5_group(
|
212
|
+
name,
|
213
|
+
group,
|
214
|
+
grp_attrs=obj.attrs,
|
215
|
+
overwrite=(wo_mode in ["o", "ac"]),
|
216
|
+
)
|
217
|
+
# If the mode is overwrite, then we need to peek into the file's
|
218
|
+
# table's existing fields. If we are writing a new table to the
|
219
|
+
# group that does not contain an old field, we should delete that
|
220
|
+
# old field from the file
|
221
|
+
if wo_mode == "o":
|
222
|
+
# Find the old keys in the group that are not present in the
|
223
|
+
# new table's keys, then delete them
|
224
|
+
for key in list(set(group.keys()) - set(obj.keys())):
|
225
|
+
log.debug(f"{key} is not present in new table, deleting field")
|
226
|
+
del group[key]
|
227
|
+
|
228
|
+
for field in obj:
|
229
|
+
# eventually compress waveform table values with LGDO's
|
230
|
+
# custom codecs before writing
|
231
|
+
# if waveformtable.values.attrs["compression"] is NOT a
|
232
|
+
# WaveformCodec, just leave it there
|
233
|
+
obj_fld = None
|
234
|
+
if (
|
235
|
+
isinstance(obj, types.WaveformTable)
|
236
|
+
and field == "values"
|
237
|
+
and not isinstance(obj.values, types.VectorOfEncodedVectors)
|
238
|
+
and not isinstance(obj.values, types.ArrayOfEncodedEqualSizedArrays)
|
239
|
+
and "compression" in obj.values.attrs
|
240
|
+
and isinstance(obj.values.attrs["compression"], compression.WaveformCodec)
|
241
|
+
):
|
242
|
+
codec = obj.values.attrs["compression"]
|
243
|
+
obj_fld = compression.encode(obj.values, codec=codec)
|
244
|
+
else:
|
245
|
+
obj_fld = obj[field]
|
246
|
+
|
247
|
+
# Convert keys to string for dataset names
|
248
|
+
f = str(field)
|
249
|
+
_h5_write_lgdo(
|
250
|
+
obj_fld,
|
251
|
+
f,
|
252
|
+
lh5_file,
|
253
|
+
group=group,
|
254
|
+
start_row=start_row,
|
255
|
+
n_rows=n_rows,
|
256
|
+
wo_mode=wo_mode,
|
257
|
+
write_start=write_start,
|
258
|
+
**h5py_kwargs,
|
259
|
+
)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from .... import types
|
6
|
+
from ...exceptions import LH5EncodeError
|
7
|
+
|
8
|
+
log = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def _h5_write_scalar(obj, name, lh5_file, group="/", wo_mode="append"):
|
12
|
+
assert isinstance(obj, types.Scalar)
|
13
|
+
|
14
|
+
if name in group:
|
15
|
+
if wo_mode in ["o", "a"]:
|
16
|
+
log.debug(f"overwriting {name} in {group}")
|
17
|
+
del group[name]
|
18
|
+
else:
|
19
|
+
msg = f"tried to overwrite but wo_mode is {wo_mode!r}"
|
20
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
21
|
+
|
22
|
+
ds = group.create_dataset(name, shape=(), data=obj.value)
|
23
|
+
ds.attrs.update(obj.attrs)
|
@@ -0,0 +1,95 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from .... import types
|
6
|
+
from ... import utils
|
7
|
+
from ...exceptions import LH5EncodeError
|
8
|
+
from .array import _h5_write_array
|
9
|
+
|
10
|
+
log = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
def _h5_write_vector_of_vectors(
|
14
|
+
obj,
|
15
|
+
name,
|
16
|
+
lh5_file,
|
17
|
+
group="/",
|
18
|
+
start_row=0,
|
19
|
+
n_rows=None,
|
20
|
+
wo_mode="append",
|
21
|
+
write_start=0,
|
22
|
+
**h5py_kwargs,
|
23
|
+
):
|
24
|
+
assert isinstance(obj, types.VectorOfVectors)
|
25
|
+
|
26
|
+
group = utils.get_h5_group(
|
27
|
+
name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
|
28
|
+
)
|
29
|
+
if n_rows is None or n_rows > obj.cumulative_length.nda.shape[0] - start_row:
|
30
|
+
n_rows = obj.cumulative_length.nda.shape[0] - start_row
|
31
|
+
|
32
|
+
# if appending we need to add an appropriate offset to the
|
33
|
+
# cumulative lengths as appropriate for the in-file object
|
34
|
+
offset = 0 # declare here because we have to subtract it off at the end
|
35
|
+
if (wo_mode in ("a", "o")) and "cumulative_length" in group:
|
36
|
+
len_cl = len(group["cumulative_length"])
|
37
|
+
if wo_mode == "a":
|
38
|
+
write_start = len_cl
|
39
|
+
if len_cl > 0:
|
40
|
+
offset = group["cumulative_length"][write_start - 1]
|
41
|
+
|
42
|
+
# First write flattened_data array. Only write rows with data.
|
43
|
+
fd_start = 0 if start_row == 0 else obj.cumulative_length.nda[start_row - 1]
|
44
|
+
fd_n_rows = (
|
45
|
+
obj.cumulative_length.nda[start_row + n_rows - 1] - fd_start
|
46
|
+
if len(obj.cumulative_length) > 0
|
47
|
+
else 0
|
48
|
+
)
|
49
|
+
|
50
|
+
if isinstance(obj.flattened_data, types.Array):
|
51
|
+
_func = _h5_write_array
|
52
|
+
elif isinstance(obj.flattened_data, types.VectorOfVectors):
|
53
|
+
_func = _h5_write_vector_of_vectors
|
54
|
+
else:
|
55
|
+
msg = (
|
56
|
+
"don't know how to serialize to disk flattened_data "
|
57
|
+
"of {type(obj.flattened_data).__name__} type"
|
58
|
+
)
|
59
|
+
raise LH5EncodeError(msg, lh5_file, group, f"{name}.flattened_data")
|
60
|
+
|
61
|
+
_func(
|
62
|
+
obj.flattened_data,
|
63
|
+
"flattened_data",
|
64
|
+
lh5_file,
|
65
|
+
group=group,
|
66
|
+
start_row=fd_start,
|
67
|
+
n_rows=fd_n_rows,
|
68
|
+
wo_mode=wo_mode,
|
69
|
+
write_start=offset,
|
70
|
+
**h5py_kwargs,
|
71
|
+
)
|
72
|
+
|
73
|
+
# now offset is used to give appropriate in-file values for
|
74
|
+
# cumulative_length. Need to adjust it for start_row
|
75
|
+
if start_row > 0:
|
76
|
+
offset -= obj.cumulative_length.nda[start_row - 1]
|
77
|
+
|
78
|
+
# Add offset to obj.cumulative_length itself to avoid memory allocation.
|
79
|
+
# Then subtract it off after writing! (otherwise it will be changed
|
80
|
+
# upon return)
|
81
|
+
cl_dtype = obj.cumulative_length.nda.dtype.type
|
82
|
+
obj.cumulative_length.nda += cl_dtype(offset)
|
83
|
+
|
84
|
+
_h5_write_array(
|
85
|
+
obj.cumulative_length,
|
86
|
+
"cumulative_length",
|
87
|
+
lh5_file,
|
88
|
+
group=group,
|
89
|
+
start_row=start_row,
|
90
|
+
n_rows=n_rows,
|
91
|
+
wo_mode=wo_mode,
|
92
|
+
write_start=write_start,
|
93
|
+
**h5py_kwargs,
|
94
|
+
)
|
95
|
+
obj.cumulative_length.nda -= cl_dtype(offset)
|