legend-pydataobj 1.11.5__py3-none-any.whl → 1.11.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.5.dist-info → legend_pydataobj-1.11.7.dist-info}/METADATA +4 -2
- {legend_pydataobj-1.11.5.dist-info → legend_pydataobj-1.11.7.dist-info}/RECORD +14 -13
- {legend_pydataobj-1.11.5.dist-info → legend_pydataobj-1.11.7.dist-info}/WHEEL +1 -1
- {legend_pydataobj-1.11.5.dist-info → legend_pydataobj-1.11.7.dist-info}/entry_points.txt +1 -1
- lgdo/_version.py +9 -4
- lgdo/cli.py +10 -155
- lgdo/lh5/__init__.py +1 -0
- lgdo/lh5/_serializers/read/utils.py +1 -1
- lgdo/lh5/_serializers/read/vector_of_vectors.py +1 -1
- lgdo/lh5/concat.py +225 -0
- lgdo/types/table.py +31 -19
- lgdo/types/vectorofvectors.py +1 -1
- {legend_pydataobj-1.11.5.dist-info → legend_pydataobj-1.11.7.dist-info/licenses}/LICENSE +0 -0
- {legend_pydataobj-1.11.5.dist-info → legend_pydataobj-1.11.7.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.11.
|
3
|
+
Version: 1.11.7
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -725,6 +725,8 @@ Requires-Dist: pre-commit; extra == "test"
|
|
725
725
|
Requires-Dist: pylegendtestdata; extra == "test"
|
726
726
|
Requires-Dist: pytest>=6.0; extra == "test"
|
727
727
|
Requires-Dist: pytest-cov; extra == "test"
|
728
|
+
Requires-Dist: dbetto; extra == "test"
|
729
|
+
Dynamic: license-file
|
728
730
|
|
729
731
|
# legend-pydataobj
|
730
732
|
|
@@ -1,6 +1,7 @@
|
|
1
|
+
legend_pydataobj-1.11.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
1
2
|
lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
|
2
|
-
lgdo/_version.py,sha256=
|
3
|
-
lgdo/cli.py,sha256=
|
3
|
+
lgdo/_version.py,sha256=WYo6AtimYOvXEEB_DEJYUqS-yeVHGFoR5t7JM_9dSwo,513
|
4
|
+
lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
|
4
5
|
lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
|
5
6
|
lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
|
6
7
|
lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
|
@@ -12,7 +13,8 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
|
|
12
13
|
lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
|
13
14
|
lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
|
14
15
|
lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
|
15
|
-
lgdo/lh5/__init__.py,sha256=
|
16
|
+
lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
|
17
|
+
lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
|
16
18
|
lgdo/lh5/core.py,sha256=__-A6Abctzfwfo4-xJi68xs2e4vfzONEQTJVrUCOw-I,13922
|
17
19
|
lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
|
18
20
|
lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
|
@@ -27,8 +29,8 @@ lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_h
|
|
27
29
|
lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
|
28
30
|
lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
|
29
31
|
lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
|
30
|
-
lgdo/lh5/_serializers/read/utils.py,sha256=
|
31
|
-
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=
|
32
|
+
lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
|
33
|
+
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
|
32
34
|
lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
35
|
lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
|
34
36
|
lgdo/lh5/_serializers/write/composite.py,sha256=I6lH0nWFIpAfZyG4-0rLxzg3mfazZ_FEhQVp1FZ0aA4,9254
|
@@ -43,13 +45,12 @@ lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
|
|
43
45
|
lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
|
44
46
|
lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
|
45
47
|
lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
|
46
|
-
lgdo/types/table.py,sha256=
|
47
|
-
lgdo/types/vectorofvectors.py,sha256=
|
48
|
+
lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
|
49
|
+
lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
|
48
50
|
lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
|
49
51
|
lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
|
50
|
-
legend_pydataobj-1.11.
|
51
|
-
legend_pydataobj-1.11.
|
52
|
-
legend_pydataobj-1.11.
|
53
|
-
legend_pydataobj-1.11.
|
54
|
-
legend_pydataobj-1.11.
|
55
|
-
legend_pydataobj-1.11.5.dist-info/RECORD,,
|
52
|
+
legend_pydataobj-1.11.7.dist-info/METADATA,sha256=Z0-UFMzWILag78U1HkNpbYwKDb_JZkZ8kZLtW4T8gw0,44443
|
53
|
+
legend_pydataobj-1.11.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
54
|
+
legend_pydataobj-1.11.7.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
|
55
|
+
legend_pydataobj-1.11.7.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
56
|
+
legend_pydataobj-1.11.7.dist-info/RECORD,,
|
lgdo/_version.py
CHANGED
@@ -1,8 +1,13 @@
|
|
1
|
-
# file generated by
|
1
|
+
# file generated by setuptools-scm
|
2
2
|
# don't change, don't track in version control
|
3
|
+
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
5
|
+
|
3
6
|
TYPE_CHECKING = False
|
4
7
|
if TYPE_CHECKING:
|
5
|
-
from typing import Tuple
|
8
|
+
from typing import Tuple
|
9
|
+
from typing import Union
|
10
|
+
|
6
11
|
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
7
12
|
else:
|
8
13
|
VERSION_TUPLE = object
|
@@ -12,5 +17,5 @@ __version__: str
|
|
12
17
|
__version_tuple__: VERSION_TUPLE
|
13
18
|
version_tuple: VERSION_TUPLE
|
14
19
|
|
15
|
-
__version__ = version = '1.11.
|
16
|
-
__version_tuple__ = version_tuple = (1, 11,
|
20
|
+
__version__ = version = '1.11.7'
|
21
|
+
__version_tuple__ = version_tuple = (1, 11, 7)
|
lgdo/cli.py
CHANGED
@@ -3,12 +3,12 @@
|
|
3
3
|
from __future__ import annotations
|
4
4
|
|
5
5
|
import argparse
|
6
|
-
import fnmatch
|
7
6
|
import logging
|
8
7
|
import sys
|
9
8
|
|
10
|
-
from . import
|
9
|
+
from . import __version__, lh5
|
11
10
|
from . import logging as lgdogging # eheheh
|
11
|
+
from .lh5.concat import lh5concat
|
12
12
|
|
13
13
|
log = logging.getLogger(__name__)
|
14
14
|
|
@@ -80,7 +80,7 @@ def lh5ls(args=None):
|
|
80
80
|
)
|
81
81
|
|
82
82
|
|
83
|
-
def
|
83
|
+
def lh5concat_cli(args=None):
|
84
84
|
"""Command line interface for concatenating array-like LGDOs in LH5 files."""
|
85
85
|
parser = argparse.ArgumentParser(
|
86
86
|
prog="lh5concat",
|
@@ -174,155 +174,10 @@ Exclude the /data/table1/col1 Table column:
|
|
174
174
|
print(__version__) # noqa: T201
|
175
175
|
sys.exit()
|
176
176
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
# let's remove objects with nested LGDOs inside
|
186
|
-
to_remove = set()
|
187
|
-
for name in obj_list_full:
|
188
|
-
if len(fnmatch.filter(obj_list_full, f"{name}/*")) > 1:
|
189
|
-
to_remove.add(name)
|
190
|
-
obj_list_full -= to_remove
|
191
|
-
|
192
|
-
obj_list = set()
|
193
|
-
# now first remove excluded stuff
|
194
|
-
if args.exclude is not None:
|
195
|
-
for exc in args.exclude:
|
196
|
-
obj_list_full -= set(fnmatch.filter(obj_list_full, exc.strip("/")))
|
197
|
-
|
198
|
-
# then make list of included, based on latest list
|
199
|
-
if args.include is not None:
|
200
|
-
for inc in args.include:
|
201
|
-
obj_list |= set(fnmatch.filter(obj_list_full, inc.strip("/")))
|
202
|
-
else:
|
203
|
-
obj_list = obj_list_full
|
204
|
-
|
205
|
-
# sort
|
206
|
-
obj_list = sorted(obj_list)
|
207
|
-
|
208
|
-
msg = f"objects matching include patterns {args.include} in {file0}: {obj_list}"
|
209
|
-
log.debug(msg)
|
210
|
-
|
211
|
-
# 1. read first valid lgdo from left to right
|
212
|
-
store = lh5.LH5Store()
|
213
|
-
h5f0 = store.gimme_file(file0)
|
214
|
-
lgdos = {}
|
215
|
-
lgdo_structs = {}
|
216
|
-
# loop over object list in the first file
|
217
|
-
for name in obj_list:
|
218
|
-
# now loop over groups starting from root
|
219
|
-
current = ""
|
220
|
-
for item in name.split("/"):
|
221
|
-
current = f"{current}/{item}".strip("/")
|
222
|
-
|
223
|
-
if current in lgdos:
|
224
|
-
break
|
225
|
-
|
226
|
-
# not even an LGDO (i.e. a plain HDF5 group)!
|
227
|
-
if "datatype" not in h5f0[current].attrs:
|
228
|
-
continue
|
229
|
-
|
230
|
-
# read as little as possible
|
231
|
-
obj, _ = store.read(current, h5f0, n_rows=1)
|
232
|
-
if isinstance(obj, (Table, Array, VectorOfVectors)):
|
233
|
-
# read all!
|
234
|
-
obj, _ = store.read(current, h5f0)
|
235
|
-
lgdos[current] = obj
|
236
|
-
elif isinstance(obj, Struct):
|
237
|
-
# structs might be used in a "group-like" fashion (i.e. they might only
|
238
|
-
# contain array-like objects).
|
239
|
-
# note: handle after handling tables, as tables also satisfy this check.
|
240
|
-
lgdo_structs[current] = obj.attrs["datatype"]
|
241
|
-
continue
|
242
|
-
elif isinstance(obj, Scalar):
|
243
|
-
msg = f"cannot concat scalar field {current}"
|
244
|
-
log.warning(msg)
|
245
|
-
|
246
|
-
break
|
247
|
-
|
248
|
-
msg = f"first-level, array-like objects: {lgdos.keys()}"
|
249
|
-
log.debug(msg)
|
250
|
-
msg = f"nested structs: {lgdo_structs.keys()}"
|
251
|
-
log.debug(msg)
|
252
|
-
|
253
|
-
h5f0.close()
|
254
|
-
|
255
|
-
if lgdos == {}:
|
256
|
-
msg = "did not find any field to concatenate, exit"
|
257
|
-
log.error(msg)
|
258
|
-
return
|
259
|
-
|
260
|
-
# 2. remove (nested) table fields based on obj_list
|
261
|
-
|
262
|
-
def _inplace_table_filter(name, table, obj_list):
|
263
|
-
# filter objects nested in this LGDO
|
264
|
-
skm = fnmatch.filter(obj_list, f"{name}/*")
|
265
|
-
kept = {it.removeprefix(name).strip("/").split("/")[0] for it in skm}
|
266
|
-
|
267
|
-
# now remove fields
|
268
|
-
for k in list(table.keys()):
|
269
|
-
if k not in kept:
|
270
|
-
table.remove_column(k)
|
271
|
-
|
272
|
-
msg = f"fields left in table '{name}': {table.keys()}"
|
273
|
-
log.debug(msg)
|
274
|
-
|
275
|
-
# recurse!
|
276
|
-
for k2, v2 in table.items():
|
277
|
-
if not isinstance(v2, Table):
|
278
|
-
continue
|
279
|
-
|
280
|
-
_inplace_table_filter(f"{name}/{k2}", v2, obj_list)
|
281
|
-
|
282
|
-
for key, val in lgdos.items():
|
283
|
-
if not isinstance(val, Table):
|
284
|
-
continue
|
285
|
-
|
286
|
-
_inplace_table_filter(key, val, obj_list)
|
287
|
-
|
288
|
-
# 3. write to output file
|
289
|
-
msg = f"creating output file {args.output}"
|
290
|
-
log.info(msg)
|
291
|
-
|
292
|
-
first_done = False
|
293
|
-
for name, obj in lgdos.items():
|
294
|
-
store.write(
|
295
|
-
obj,
|
296
|
-
name,
|
297
|
-
args.output,
|
298
|
-
wo_mode="overwrite_file"
|
299
|
-
if (args.overwrite and not first_done)
|
300
|
-
else "write_safe",
|
301
|
-
)
|
302
|
-
|
303
|
-
first_done = True
|
304
|
-
|
305
|
-
# 4. loop over rest of files/names and write-append
|
306
|
-
|
307
|
-
for file in args.lh5_file[1:]:
|
308
|
-
msg = f"appending file {file} to {args.output}"
|
309
|
-
log.info(msg)
|
310
|
-
|
311
|
-
for name in lgdos:
|
312
|
-
obj, _ = store.read(name, file)
|
313
|
-
# need to remove nested LGDOs from obj too before appending
|
314
|
-
if isinstance(obj, Table):
|
315
|
-
_inplace_table_filter(name, obj, obj_list)
|
316
|
-
|
317
|
-
store.write(obj, name, args.output, wo_mode="append")
|
318
|
-
|
319
|
-
# 5. reset datatypes of the "group-like" structs
|
320
|
-
|
321
|
-
if lgdo_structs != {}:
|
322
|
-
output_file = store.gimme_file(args.output, mode="a")
|
323
|
-
for struct, struct_dtype in lgdo_structs.items():
|
324
|
-
msg = f"reset datatype of struct {struct} to {struct_dtype}"
|
325
|
-
log.debug(msg)
|
326
|
-
|
327
|
-
output_file[struct].attrs["datatype"] = struct_dtype
|
328
|
-
output_file.close()
|
177
|
+
lh5concat(
|
178
|
+
lh5_files=args.lh5_file,
|
179
|
+
overwrite=args.overwrite,
|
180
|
+
output=args.output,
|
181
|
+
include_list=args.include,
|
182
|
+
exclude_list=args.exclude,
|
183
|
+
)
|
lgdo/lh5/__init__.py
CHANGED
@@ -34,7 +34,7 @@ def build_field_mask(field_mask: Mapping[str, bool] | Collection[str]) -> defaul
|
|
34
34
|
default = not field_mask[next(iter(field_mask.keys()))]
|
35
35
|
return defaultdict(lambda: default, field_mask)
|
36
36
|
if isinstance(field_mask, (list, tuple, set)):
|
37
|
-
return defaultdict(bool,
|
37
|
+
return defaultdict(bool, dict.fromkeys(field_mask, True))
|
38
38
|
if isinstance(field_mask, defaultdict):
|
39
39
|
return field_mask
|
40
40
|
msg = "bad field_mask type"
|
@@ -123,7 +123,7 @@ def _h5_read_vector_of_vectors(
|
|
123
123
|
)
|
124
124
|
msg = (
|
125
125
|
f"cumulative_length non-increasing between entries "
|
126
|
-
f"{start_row} and {start_row+n_rows_read}"
|
126
|
+
f"{start_row} and {start_row + n_rows_read}"
|
127
127
|
)
|
128
128
|
raise LH5DecodeError(msg, fname, oname)
|
129
129
|
|
lgdo/lh5/concat.py
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import fnmatch
|
4
|
+
import logging
|
5
|
+
|
6
|
+
from lgdo.lh5 import LH5Iterator
|
7
|
+
|
8
|
+
from .. import Array, Scalar, Struct, Table, VectorOfVectors, lh5
|
9
|
+
|
10
|
+
log = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
def _get_obj_list(
|
14
|
+
lh5_files: list, include_list: list | None = None, exclude_list: list | None = None
|
15
|
+
) -> list[str]:
|
16
|
+
"""Extract a list of lh5 objects to concatenate.
|
17
|
+
|
18
|
+
Parameters
|
19
|
+
----------
|
20
|
+
lh5_files
|
21
|
+
list of input files to concatenate.
|
22
|
+
include_list
|
23
|
+
patterns for tables to include.
|
24
|
+
exclude_list
|
25
|
+
patterns for tables to exclude.
|
26
|
+
|
27
|
+
"""
|
28
|
+
file0 = lh5_files[0]
|
29
|
+
obj_list_full = set(lh5.ls(file0, recursive=True))
|
30
|
+
|
31
|
+
# let's remove objects with nested LGDOs inside
|
32
|
+
to_remove = set()
|
33
|
+
for name in obj_list_full:
|
34
|
+
if len(fnmatch.filter(obj_list_full, f"{name}/*")) > 1:
|
35
|
+
to_remove.add(name)
|
36
|
+
obj_list_full -= to_remove
|
37
|
+
|
38
|
+
obj_list = set()
|
39
|
+
# now first remove excluded stuff
|
40
|
+
if exclude_list is not None:
|
41
|
+
for exc in exclude_list:
|
42
|
+
obj_list_full -= set(fnmatch.filter(obj_list_full, exc.strip("/")))
|
43
|
+
|
44
|
+
# then make list of included, based on latest list
|
45
|
+
if include_list is not None:
|
46
|
+
for inc in include_list:
|
47
|
+
obj_list |= set(fnmatch.filter(obj_list_full, inc.strip("/")))
|
48
|
+
else:
|
49
|
+
obj_list = obj_list_full
|
50
|
+
|
51
|
+
# sort
|
52
|
+
return sorted(obj_list)
|
53
|
+
|
54
|
+
|
55
|
+
def _get_lgdos(file, obj_list):
|
56
|
+
"""Get name of LGDO objects."""
|
57
|
+
|
58
|
+
store = lh5.LH5Store()
|
59
|
+
h5f0 = store.gimme_file(file)
|
60
|
+
|
61
|
+
lgdos = []
|
62
|
+
lgdo_structs = {}
|
63
|
+
|
64
|
+
# loop over object list in the first file
|
65
|
+
for name in obj_list:
|
66
|
+
# now loop over groups starting from root
|
67
|
+
current = ""
|
68
|
+
for item in name.split("/"):
|
69
|
+
current = f"{current}/{item}".strip("/")
|
70
|
+
|
71
|
+
if current in lgdos:
|
72
|
+
break
|
73
|
+
|
74
|
+
# not even an LGDO (i.e. a plain HDF5 group)!
|
75
|
+
if "datatype" not in h5f0[current].attrs:
|
76
|
+
continue
|
77
|
+
|
78
|
+
# read as little as possible
|
79
|
+
obj, _ = store.read(current, h5f0, n_rows=1)
|
80
|
+
if isinstance(obj, (Table, Array, VectorOfVectors)):
|
81
|
+
lgdos.append(current)
|
82
|
+
|
83
|
+
elif isinstance(obj, Struct):
|
84
|
+
# structs might be used in a "group-like" fashion (i.e. they might only
|
85
|
+
# contain array-like objects).
|
86
|
+
# note: handle after handling tables, as tables also satisfy this check.
|
87
|
+
lgdo_structs[current] = obj.attrs["datatype"]
|
88
|
+
continue
|
89
|
+
|
90
|
+
elif isinstance(obj, Scalar):
|
91
|
+
msg = f"cannot concat scalar field {current}"
|
92
|
+
log.warning(msg)
|
93
|
+
|
94
|
+
break
|
95
|
+
|
96
|
+
msg = f"first-level, array-like objects: {lgdos}"
|
97
|
+
log.info(msg)
|
98
|
+
|
99
|
+
msg = f"nested structs: {lgdo_structs}"
|
100
|
+
log.info(msg)
|
101
|
+
|
102
|
+
h5f0.close()
|
103
|
+
|
104
|
+
if lgdos == []:
|
105
|
+
msg = "did not find any field to concatenate, exit"
|
106
|
+
raise RuntimeError(msg)
|
107
|
+
|
108
|
+
return lgdos, lgdo_structs
|
109
|
+
|
110
|
+
|
111
|
+
def _inplace_table_filter(name, table, obj_list):
|
112
|
+
"""filter objects nested in this LGDO"""
|
113
|
+
skm = fnmatch.filter(obj_list, f"{name}/*")
|
114
|
+
kept = {it.removeprefix(name).strip("/").split("/")[0] for it in skm}
|
115
|
+
|
116
|
+
# now remove fields
|
117
|
+
for k in list(table.keys()):
|
118
|
+
if k not in kept:
|
119
|
+
table.remove_column(k)
|
120
|
+
|
121
|
+
msg = f"fields left in table '{name}': {table.keys()}"
|
122
|
+
log.debug(msg)
|
123
|
+
|
124
|
+
# recurse!
|
125
|
+
for k2, v2 in table.items():
|
126
|
+
if not isinstance(v2, Table):
|
127
|
+
continue
|
128
|
+
|
129
|
+
_inplace_table_filter(f"{name}/{k2}", v2, obj_list)
|
130
|
+
|
131
|
+
|
132
|
+
def _remove_nested_fields(lgdos: dict, obj_list: list):
|
133
|
+
"""Remove (nested) table fields based on obj_list."""
|
134
|
+
|
135
|
+
for key, val in lgdos.items():
|
136
|
+
if not isinstance(val, Table):
|
137
|
+
continue
|
138
|
+
|
139
|
+
_inplace_table_filter(key, val, obj_list)
|
140
|
+
|
141
|
+
|
142
|
+
def _slice(obj, n_rows):
|
143
|
+
ak_obj = obj.view_as("ak")[:n_rows]
|
144
|
+
obj_type = type(obj)
|
145
|
+
return obj_type(ak_obj)
|
146
|
+
|
147
|
+
|
148
|
+
def lh5concat(
|
149
|
+
lh5_files: list,
|
150
|
+
output: str,
|
151
|
+
overwrite: bool = False,
|
152
|
+
*,
|
153
|
+
include_list: list | None = None,
|
154
|
+
exclude_list: list | None = None,
|
155
|
+
) -> None:
|
156
|
+
"""Concatenate LGDO Arrays, VectorOfVectors and Tables in LH5 files.
|
157
|
+
|
158
|
+
Parameters
|
159
|
+
----------
|
160
|
+
lh5_files
|
161
|
+
list of input files to concatenate.
|
162
|
+
output
|
163
|
+
path to the output file
|
164
|
+
include_list
|
165
|
+
patterns for tables to include.
|
166
|
+
exclude_list
|
167
|
+
patterns for tables to exclude.
|
168
|
+
"""
|
169
|
+
|
170
|
+
if len(lh5_files) < 2:
|
171
|
+
msg = "you must provide at least two input files"
|
172
|
+
raise RuntimeError(msg)
|
173
|
+
|
174
|
+
# determine list of objects by recursively ls'ing first file
|
175
|
+
obj_list = _get_obj_list(
|
176
|
+
lh5_files, include_list=include_list, exclude_list=exclude_list
|
177
|
+
)
|
178
|
+
|
179
|
+
msg = f"objects matching include patterns {include_list} in {lh5_files[0]}: {obj_list}"
|
180
|
+
log.info(msg)
|
181
|
+
|
182
|
+
lgdos, lgdo_structs = _get_lgdos(lh5_files[0], obj_list)
|
183
|
+
first_done = False
|
184
|
+
store = lh5.LH5Store()
|
185
|
+
|
186
|
+
# loop over lgdo objects
|
187
|
+
for lgdo in lgdos:
|
188
|
+
# iterate over the files
|
189
|
+
for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
|
190
|
+
data = {lgdo: _slice(lh5_obj, n_rows)}
|
191
|
+
|
192
|
+
# remove the nested fields
|
193
|
+
_remove_nested_fields(data, obj_list)
|
194
|
+
|
195
|
+
if first_done is False:
|
196
|
+
msg = f"creating output file {output}"
|
197
|
+
log.info(msg)
|
198
|
+
|
199
|
+
store.write(
|
200
|
+
data[lgdo],
|
201
|
+
lgdo,
|
202
|
+
output,
|
203
|
+
wo_mode="overwrite_file"
|
204
|
+
if (overwrite and not first_done)
|
205
|
+
else "write_safe",
|
206
|
+
)
|
207
|
+
first_done = True
|
208
|
+
|
209
|
+
else:
|
210
|
+
msg = f"appending to {output}"
|
211
|
+
log.info(msg)
|
212
|
+
|
213
|
+
if isinstance(data[lgdo], Table):
|
214
|
+
_inplace_table_filter(lgdo, data[lgdo], obj_list)
|
215
|
+
|
216
|
+
store.write(data[lgdo], lgdo, output, wo_mode="append")
|
217
|
+
|
218
|
+
if lgdo_structs != {}:
|
219
|
+
output_file = store.gimme_file(output, mode="a")
|
220
|
+
for struct, struct_dtype in lgdo_structs.items():
|
221
|
+
msg = f"reset datatype of struct {struct} to {struct_dtype}"
|
222
|
+
log.debug(msg)
|
223
|
+
|
224
|
+
output_file[struct].attrs["datatype"] = struct_dtype
|
225
|
+
output_file.close()
|
lgdo/types/table.py
CHANGED
@@ -351,31 +351,39 @@ class Table(Struct):
|
|
351
351
|
msg = f"evaluating {expr!r} with locals={(self_unwrap | parameters)} and {has_ak=}"
|
352
352
|
log.debug(msg)
|
353
353
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
msg = f"...the result is {out_data!r}"
|
362
|
-
log.debug(msg)
|
363
|
-
|
364
|
-
# need to convert back to LGDO
|
365
|
-
# np.evaluate should always return a numpy thing?
|
366
|
-
if out_data.ndim == 0:
|
367
|
-
return Scalar(out_data.item())
|
368
|
-
if out_data.ndim == 1:
|
369
|
-
return Array(out_data)
|
370
|
-
if out_data.ndim == 2:
|
371
|
-
return ArrayOfEqualSizedArrays(nda=out_data)
|
354
|
+
def _make_lgdo(data):
|
355
|
+
if data.ndim == 0:
|
356
|
+
return Scalar(data.item())
|
357
|
+
if data.ndim == 1:
|
358
|
+
return Array(data)
|
359
|
+
if data.ndim == 2:
|
360
|
+
return ArrayOfEqualSizedArrays(nda=data)
|
372
361
|
|
373
362
|
msg = (
|
374
|
-
f"evaluation resulted in {
|
363
|
+
f"evaluation resulted in {data.ndim}-dimensional data, "
|
375
364
|
"I don't know which LGDO this corresponds to"
|
376
365
|
)
|
377
366
|
raise RuntimeError(msg)
|
378
367
|
|
368
|
+
# use numexpr if we are only dealing with numpy data types (and no global dictionary)
|
369
|
+
if not has_ak and modules is None:
|
370
|
+
try:
|
371
|
+
out_data = ne.evaluate(
|
372
|
+
expr,
|
373
|
+
local_dict=(self_unwrap | parameters),
|
374
|
+
)
|
375
|
+
|
376
|
+
msg = f"...the result is {out_data!r}"
|
377
|
+
log.debug(msg)
|
378
|
+
|
379
|
+
# need to convert back to LGDO
|
380
|
+
# np.evaluate should always return a numpy thing?
|
381
|
+
return _make_lgdo(out_data)
|
382
|
+
|
383
|
+
except Exception:
|
384
|
+
msg = f"Warning {expr} could not be evaluated with numexpr probably due to some not allowed characters, trying with eval()."
|
385
|
+
log.debug(msg)
|
386
|
+
|
379
387
|
# resort to good ol' eval()
|
380
388
|
globs = {"ak": ak, "np": np}
|
381
389
|
if modules is not None:
|
@@ -392,6 +400,10 @@ class Table(Struct):
|
|
392
400
|
return Array(out_data.to_numpy())
|
393
401
|
return VectorOfVectors(out_data)
|
394
402
|
|
403
|
+
# modules can still produce numpy array
|
404
|
+
if isinstance(out_data, np.ndarray):
|
405
|
+
return _make_lgdo(out_data)
|
406
|
+
|
395
407
|
if np.isscalar(out_data):
|
396
408
|
return Scalar(out_data)
|
397
409
|
|
lgdo/types/vectorofvectors.py
CHANGED
@@ -138,7 +138,7 @@ class VectorOfVectors(LGDO):
|
|
138
138
|
# FIXME: have to copy the buffers, otherwise self will not own the
|
139
139
|
# data and self.resize() will fail. Is it possible to avoid this?
|
140
140
|
flattened_data = np.copy(
|
141
|
-
container.pop(f"node{data.ndim-1}-data", np.empty(0, dtype=dtype))
|
141
|
+
container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
|
142
142
|
)
|
143
143
|
|
144
144
|
# if user-provided dtype is different than dtype from Awkward, cast
|
File without changes
|
File without changes
|