legend-pydataobj 1.11.6__py3-none-any.whl → 1.12.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.11.6
3
+ Version: 1.12.0a1
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -726,6 +726,7 @@ Requires-Dist: pylegendtestdata; extra == "test"
726
726
  Requires-Dist: pytest>=6.0; extra == "test"
727
727
  Requires-Dist: pytest-cov; extra == "test"
728
728
  Requires-Dist: dbetto; extra == "test"
729
+ Dynamic: license-file
729
730
 
730
731
  # legend-pydataobj
731
732
 
@@ -1,6 +1,7 @@
1
+ legend_pydataobj-1.12.0a1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
1
2
  lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
2
- lgdo/_version.py,sha256=Uoj5vQkNSraBmH8UOZgP0kMxvcHSlKlg1J7QJpv8_BM,413
3
- lgdo/cli.py,sha256=Qm2EPmoIVxENAR8BeW7oWpTdHT4GbV-owfzM5NkgjvM,9353
3
+ lgdo/_version.py,sha256=kTYHwRhTzZEJHpwJeVgXBi4yFTeQDpnR6MYkvCMA06Q,515
4
+ lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
4
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
5
6
  lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
6
7
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
@@ -12,44 +13,44 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
12
13
  lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
13
14
  lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
14
15
  lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
15
- lgdo/lh5/__init__.py,sha256=FflSA1LF3RTom1YvlGmTleJcl1ytxwez8B_Hn_o10wU,811
16
- lgdo/lh5/core.py,sha256=__-A6Abctzfwfo4-xJi68xs2e4vfzONEQTJVrUCOw-I,13922
16
+ lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
17
+ lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
18
+ lgdo/lh5/core.py,sha256=GjosZGUp4GSO5FtWV9eXUt_6DGU_OwJXODlj5K1j93M,13320
17
19
  lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
18
20
  lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
19
- lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
20
- lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
21
+ lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
22
+ lgdo/lh5/store.py,sha256=MYbMt-Mc7izELxuyLlSrrYrylCIzxc2CLzZYIVbZ33w,8455
21
23
  lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
22
24
  lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
23
25
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
24
26
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
27
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
26
- lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
28
+ lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
27
29
  lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
28
30
  lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
29
31
  lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
30
- lgdo/lh5/_serializers/read/utils.py,sha256=USacxDA0eY-u9lDOZDuJHcScoSVMNeAYljmRvW0T1Jk,7587
31
- lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=Fqh2gXFqeR2y0ofQn4GoSdSAATPvHiuBzzcgL16e6ss,7205
32
+ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
33
+ lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
32
34
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
35
  lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
34
36
  lgdo/lh5/_serializers/write/composite.py,sha256=I6lH0nWFIpAfZyG4-0rLxzg3mfazZ_FEhQVp1FZ0aA4,9254
35
37
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
36
38
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
37
39
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
38
- lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
40
+ lgdo/types/array.py,sha256=e3p93yrfzSmyBgWdGqqtETcKpM7_FxENaAErru15rvo,8904
39
41
  lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
40
- lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
42
+ lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
41
43
  lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
42
- lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
43
- lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
44
+ lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
45
+ lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
44
46
  lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
45
47
  lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
46
- lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
47
- lgdo/types/vectorofvectors.py,sha256=Al9FmY44M-vnzhPdQlFOzwm06LNGBI7RSLSdU5pl9us,24663
48
- lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
48
+ lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
49
+ lgdo/types/vectorofvectors.py,sha256=CtPR2WDBmJmzzfXwH4aUcNMB5LvTiGWmL_qRbFah3to,24756
50
+ lgdo/types/vovutils.py,sha256=WjvPLEJrRNjktnbyfypfgxZX-K_aOvcwPygfzoknsyA,10701
49
51
  lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
50
- legend_pydataobj-1.11.6.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
51
- legend_pydataobj-1.11.6.dist-info/METADATA,sha256=tZZ9ocWZZR9ECF5Hg8DcQCL6uzCthV8L1ApxIbOu6UY,44421
52
- legend_pydataobj-1.11.6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
53
- legend_pydataobj-1.11.6.dist-info/entry_points.txt,sha256=Uu5MTlppBZxB4QGlLv-oX8FqACWjAZDNii__TBDJwLQ,72
54
- legend_pydataobj-1.11.6.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
55
- legend_pydataobj-1.11.6.dist-info/RECORD,,
52
+ legend_pydataobj-1.12.0a1.dist-info/METADATA,sha256=55pMph32j8h4LKGnoVEdvHX27bHr8k__sdT4L9O5dIA,44445
53
+ legend_pydataobj-1.12.0a1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
54
+ legend_pydataobj-1.12.0a1.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
+ legend_pydataobj-1.12.0a1.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
+ legend_pydataobj-1.12.0a1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,3 +1,3 @@
1
1
  [console_scripts]
2
- lh5concat = lgdo.cli:lh5concat
2
+ lh5concat = lgdo.cli:lh5concat_cli
3
3
  lh5ls = lgdo.cli:lh5ls
lgdo/_version.py CHANGED
@@ -1,8 +1,13 @@
1
- # file generated by setuptools_scm
1
+ # file generated by setuptools-scm
2
2
  # don't change, don't track in version control
3
+
4
+ __all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
5
+
3
6
  TYPE_CHECKING = False
4
7
  if TYPE_CHECKING:
5
- from typing import Tuple, Union
8
+ from typing import Tuple
9
+ from typing import Union
10
+
6
11
  VERSION_TUPLE = Tuple[Union[int, str], ...]
7
12
  else:
8
13
  VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
12
17
  __version_tuple__: VERSION_TUPLE
13
18
  version_tuple: VERSION_TUPLE
14
19
 
15
- __version__ = version = '1.11.6'
16
- __version_tuple__ = version_tuple = (1, 11, 6)
20
+ __version__ = version = '1.12.0a1'
21
+ __version_tuple__ = version_tuple = (1, 12, 0)
lgdo/cli.py CHANGED
@@ -3,12 +3,12 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import argparse
6
- import fnmatch
7
6
  import logging
8
7
  import sys
9
8
 
10
- from . import Array, Scalar, Struct, Table, VectorOfVectors, __version__, lh5
9
+ from . import __version__, lh5
11
10
  from . import logging as lgdogging # eheheh
11
+ from .lh5.concat import lh5concat
12
12
 
13
13
  log = logging.getLogger(__name__)
14
14
 
@@ -80,7 +80,7 @@ def lh5ls(args=None):
80
80
  )
81
81
 
82
82
 
83
- def lh5concat(args=None):
83
+ def lh5concat_cli(args=None):
84
84
  """Command line interface for concatenating array-like LGDOs in LH5 files."""
85
85
  parser = argparse.ArgumentParser(
86
86
  prog="lh5concat",
@@ -174,155 +174,10 @@ Exclude the /data/table1/col1 Table column:
174
174
  print(__version__) # noqa: T201
175
175
  sys.exit()
176
176
 
177
- if len(args.lh5_file) < 2:
178
- msg = "you must provide at least two input files"
179
- raise RuntimeError(msg)
180
-
181
- # determine list of objects by recursively ls'ing first file
182
- file0 = args.lh5_file[0]
183
- obj_list_full = set(lh5.ls(file0, recursive=True))
184
-
185
- # let's remove objects with nested LGDOs inside
186
- to_remove = set()
187
- for name in obj_list_full:
188
- if len(fnmatch.filter(obj_list_full, f"{name}/*")) > 1:
189
- to_remove.add(name)
190
- obj_list_full -= to_remove
191
-
192
- obj_list = set()
193
- # now first remove excluded stuff
194
- if args.exclude is not None:
195
- for exc in args.exclude:
196
- obj_list_full -= set(fnmatch.filter(obj_list_full, exc.strip("/")))
197
-
198
- # then make list of included, based on latest list
199
- if args.include is not None:
200
- for inc in args.include:
201
- obj_list |= set(fnmatch.filter(obj_list_full, inc.strip("/")))
202
- else:
203
- obj_list = obj_list_full
204
-
205
- # sort
206
- obj_list = sorted(obj_list)
207
-
208
- msg = f"objects matching include patterns {args.include} in {file0}: {obj_list}"
209
- log.debug(msg)
210
-
211
- # 1. read first valid lgdo from left to right
212
- store = lh5.LH5Store()
213
- h5f0 = store.gimme_file(file0)
214
- lgdos = {}
215
- lgdo_structs = {}
216
- # loop over object list in the first file
217
- for name in obj_list:
218
- # now loop over groups starting from root
219
- current = ""
220
- for item in name.split("/"):
221
- current = f"{current}/{item}".strip("/")
222
-
223
- if current in lgdos:
224
- break
225
-
226
- # not even an LGDO (i.e. a plain HDF5 group)!
227
- if "datatype" not in h5f0[current].attrs:
228
- continue
229
-
230
- # read as little as possible
231
- obj, _ = store.read(current, h5f0, n_rows=1)
232
- if isinstance(obj, (Table, Array, VectorOfVectors)):
233
- # read all!
234
- obj, _ = store.read(current, h5f0)
235
- lgdos[current] = obj
236
- elif isinstance(obj, Struct):
237
- # structs might be used in a "group-like" fashion (i.e. they might only
238
- # contain array-like objects).
239
- # note: handle after handling tables, as tables also satisfy this check.
240
- lgdo_structs[current] = obj.attrs["datatype"]
241
- continue
242
- elif isinstance(obj, Scalar):
243
- msg = f"cannot concat scalar field {current}"
244
- log.warning(msg)
245
-
246
- break
247
-
248
- msg = f"first-level, array-like objects: {lgdos.keys()}"
249
- log.debug(msg)
250
- msg = f"nested structs: {lgdo_structs.keys()}"
251
- log.debug(msg)
252
-
253
- h5f0.close()
254
-
255
- if lgdos == {}:
256
- msg = "did not find any field to concatenate, exit"
257
- log.error(msg)
258
- return
259
-
260
- # 2. remove (nested) table fields based on obj_list
261
-
262
- def _inplace_table_filter(name, table, obj_list):
263
- # filter objects nested in this LGDO
264
- skm = fnmatch.filter(obj_list, f"{name}/*")
265
- kept = {it.removeprefix(name).strip("/").split("/")[0] for it in skm}
266
-
267
- # now remove fields
268
- for k in list(table.keys()):
269
- if k not in kept:
270
- table.remove_column(k)
271
-
272
- msg = f"fields left in table '{name}': {table.keys()}"
273
- log.debug(msg)
274
-
275
- # recurse!
276
- for k2, v2 in table.items():
277
- if not isinstance(v2, Table):
278
- continue
279
-
280
- _inplace_table_filter(f"{name}/{k2}", v2, obj_list)
281
-
282
- for key, val in lgdos.items():
283
- if not isinstance(val, Table):
284
- continue
285
-
286
- _inplace_table_filter(key, val, obj_list)
287
-
288
- # 3. write to output file
289
- msg = f"creating output file {args.output}"
290
- log.info(msg)
291
-
292
- first_done = False
293
- for name, obj in lgdos.items():
294
- store.write(
295
- obj,
296
- name,
297
- args.output,
298
- wo_mode="overwrite_file"
299
- if (args.overwrite and not first_done)
300
- else "write_safe",
301
- )
302
-
303
- first_done = True
304
-
305
- # 4. loop over rest of files/names and write-append
306
-
307
- for file in args.lh5_file[1:]:
308
- msg = f"appending file {file} to {args.output}"
309
- log.info(msg)
310
-
311
- for name in lgdos:
312
- obj, _ = store.read(name, file)
313
- # need to remove nested LGDOs from obj too before appending
314
- if isinstance(obj, Table):
315
- _inplace_table_filter(name, obj, obj_list)
316
-
317
- store.write(obj, name, args.output, wo_mode="append")
318
-
319
- # 5. reset datatypes of the "group-like" structs
320
-
321
- if lgdo_structs != {}:
322
- output_file = store.gimme_file(args.output, mode="a")
323
- for struct, struct_dtype in lgdo_structs.items():
324
- msg = f"reset datatype of struct {struct} to {struct_dtype}"
325
- log.debug(msg)
326
-
327
- output_file[struct].attrs["datatype"] = struct_dtype
328
- output_file.close()
177
+ lh5concat(
178
+ lh5_files=args.lh5_file,
179
+ overwrite=args.overwrite,
180
+ output=args.output,
181
+ include_list=args.include,
182
+ exclude_list=args.exclude,
183
+ )
lgdo/lh5/__init__.py CHANGED
@@ -18,6 +18,7 @@ __all__ = [
18
18
  "DEFAULT_HDF5_SETTINGS",
19
19
  "LH5Iterator",
20
20
  "LH5Store",
21
+ "concat",
21
22
  "load_dfs",
22
23
  "load_nda",
23
24
  "ls",
@@ -353,15 +353,13 @@ def _h5_read_table(
353
353
  table = Table(col_dict=col_dict, attrs=attrs)
354
354
 
355
355
  # set (write) loc to end of tree
356
- table.loc = n_rows_read
356
+ table.resize(do_warn=True)
357
357
  return table, n_rows_read
358
358
 
359
359
  # We have read all fields into the object buffer. Run
360
360
  # checks: All columns should be the same size. So update
361
361
  # table's size as necessary, warn if any mismatches are found
362
362
  obj_buf.resize(do_warn=True)
363
- # set (write) loc to end of tree
364
- obj_buf.loc = obj_buf_start + n_rows_read
365
363
 
366
364
  # check attributes
367
365
  utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
@@ -34,7 +34,7 @@ def build_field_mask(field_mask: Mapping[str, bool] | Collection[str]) -> defaul
34
34
  default = not field_mask[next(iter(field_mask.keys()))]
35
35
  return defaultdict(lambda: default, field_mask)
36
36
  if isinstance(field_mask, (list, tuple, set)):
37
- return defaultdict(bool, {field: True for field in field_mask})
37
+ return defaultdict(bool, dict.fromkeys(field_mask, True))
38
38
  if isinstance(field_mask, defaultdict):
39
39
  return field_mask
40
40
  msg = "bad field_mask type"
@@ -123,7 +123,7 @@ def _h5_read_vector_of_vectors(
123
123
  )
124
124
  msg = (
125
125
  f"cumulative_length non-increasing between entries "
126
- f"{start_row} and {start_row+n_rows_read}"
126
+ f"{start_row} and {start_row + n_rows_read}"
127
127
  )
128
128
  raise LH5DecodeError(msg, fname, oname)
129
129
 
lgdo/lh5/concat.py ADDED
@@ -0,0 +1,219 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ import logging
5
+
6
+ from lgdo.lh5 import LH5Iterator
7
+
8
+ from .. import Array, Scalar, Struct, Table, VectorOfVectors, lh5
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ def _get_obj_list(
14
+ lh5_files: list, include_list: list | None = None, exclude_list: list | None = None
15
+ ) -> list[str]:
16
+ """Extract a list of lh5 objects to concatenate.
17
+
18
+ Parameters
19
+ ----------
20
+ lh5_files
21
+ list of input files to concatenate.
22
+ include_list
23
+ patterns for tables to include.
24
+ exclude_list
25
+ patterns for tables to exclude.
26
+
27
+ """
28
+ file0 = lh5_files[0]
29
+ obj_list_full = set(lh5.ls(file0, recursive=True))
30
+
31
+ # let's remove objects with nested LGDOs inside
32
+ to_remove = set()
33
+ for name in obj_list_full:
34
+ if len(fnmatch.filter(obj_list_full, f"{name}/*")) > 1:
35
+ to_remove.add(name)
36
+ obj_list_full -= to_remove
37
+
38
+ obj_list = set()
39
+ # now first remove excluded stuff
40
+ if exclude_list is not None:
41
+ for exc in exclude_list:
42
+ obj_list_full -= set(fnmatch.filter(obj_list_full, exc.strip("/")))
43
+
44
+ # then make list of included, based on latest list
45
+ if include_list is not None:
46
+ for inc in include_list:
47
+ obj_list |= set(fnmatch.filter(obj_list_full, inc.strip("/")))
48
+ else:
49
+ obj_list = obj_list_full
50
+
51
+ # sort
52
+ return sorted(obj_list)
53
+
54
+
55
+ def _get_lgdos(file, obj_list):
56
+ """Get name of LGDO objects."""
57
+
58
+ store = lh5.LH5Store()
59
+ h5f0 = store.gimme_file(file)
60
+
61
+ lgdos = []
62
+ lgdo_structs = {}
63
+
64
+ # loop over object list in the first file
65
+ for name in obj_list:
66
+ # now loop over groups starting from root
67
+ current = ""
68
+ for item in name.split("/"):
69
+ current = f"{current}/{item}".strip("/")
70
+
71
+ if current in lgdos:
72
+ break
73
+
74
+ # not even an LGDO (i.e. a plain HDF5 group)!
75
+ if "datatype" not in h5f0[current].attrs:
76
+ continue
77
+
78
+ # read as little as possible
79
+ obj = store.read(current, h5f0, n_rows=1)
80
+ if isinstance(obj, (Table, Array, VectorOfVectors)):
81
+ lgdos.append(current)
82
+
83
+ elif isinstance(obj, Struct):
84
+ # structs might be used in a "group-like" fashion (i.e. they might only
85
+ # contain array-like objects).
86
+ # note: handle after handling tables, as tables also satisfy this check.
87
+ lgdo_structs[current] = obj.attrs["datatype"]
88
+ continue
89
+
90
+ elif isinstance(obj, Scalar):
91
+ msg = f"cannot concat scalar field {current}"
92
+ log.warning(msg)
93
+
94
+ break
95
+
96
+ msg = f"first-level, array-like objects: {lgdos}"
97
+ log.info(msg)
98
+
99
+ msg = f"nested structs: {lgdo_structs}"
100
+ log.info(msg)
101
+
102
+ h5f0.close()
103
+
104
+ if lgdos == []:
105
+ msg = "did not find any field to concatenate, exit"
106
+ raise RuntimeError(msg)
107
+
108
+ return lgdos, lgdo_structs
109
+
110
+
111
+ def _inplace_table_filter(name, table, obj_list):
112
+ """filter objects nested in this LGDO"""
113
+ skm = fnmatch.filter(obj_list, f"{name}/*")
114
+ kept = {it.removeprefix(name).strip("/").split("/")[0] for it in skm}
115
+
116
+ # now remove fields
117
+ for k in list(table.keys()):
118
+ if k not in kept:
119
+ table.remove_column(k)
120
+
121
+ msg = f"fields left in table '{name}': {table.keys()}"
122
+ log.debug(msg)
123
+
124
+ # recurse!
125
+ for k2, v2 in table.items():
126
+ if not isinstance(v2, Table):
127
+ continue
128
+
129
+ _inplace_table_filter(f"{name}/{k2}", v2, obj_list)
130
+
131
+
132
+ def _remove_nested_fields(lgdos: dict, obj_list: list):
133
+ """Remove (nested) table fields based on obj_list."""
134
+
135
+ for key, val in lgdos.items():
136
+ if not isinstance(val, Table):
137
+ continue
138
+
139
+ _inplace_table_filter(key, val, obj_list)
140
+
141
+
142
+ def lh5concat(
143
+ lh5_files: list,
144
+ output: str,
145
+ overwrite: bool = False,
146
+ *,
147
+ include_list: list | None = None,
148
+ exclude_list: list | None = None,
149
+ ) -> None:
150
+ """Concatenate LGDO Arrays, VectorOfVectors and Tables in LH5 files.
151
+
152
+ Parameters
153
+ ----------
154
+ lh5_files
155
+ list of input files to concatenate.
156
+ output
157
+ path to the output file
158
+ include_list
159
+ patterns for tables to include.
160
+ exclude_list
161
+ patterns for tables to exclude.
162
+ """
163
+
164
+ if len(lh5_files) < 2:
165
+ msg = "you must provide at least two input files"
166
+ raise RuntimeError(msg)
167
+
168
+ # determine list of objects by recursively ls'ing first file
169
+ obj_list = _get_obj_list(
170
+ lh5_files, include_list=include_list, exclude_list=exclude_list
171
+ )
172
+
173
+ msg = f"objects matching include patterns {include_list} in {lh5_files[0]}: {obj_list}"
174
+ log.info(msg)
175
+
176
+ lgdos, lgdo_structs = _get_lgdos(lh5_files[0], obj_list)
177
+ first_done = False
178
+ store = lh5.LH5Store()
179
+
180
+ # loop over lgdo objects
181
+ for lgdo in lgdos:
182
+ # iterate over the files
183
+ for lh5_obj in LH5Iterator(lh5_files, lgdo):
184
+ data = {lgdo: lh5_obj}
185
+
186
+ # remove the nested fields
187
+ _remove_nested_fields(data, obj_list)
188
+
189
+ if first_done is False:
190
+ msg = f"creating output file {output}"
191
+ log.info(msg)
192
+
193
+ store.write(
194
+ data[lgdo],
195
+ lgdo,
196
+ output,
197
+ wo_mode="overwrite_file"
198
+ if (overwrite and not first_done)
199
+ else "write_safe",
200
+ )
201
+ first_done = True
202
+
203
+ else:
204
+ msg = f"appending to {output}"
205
+ log.info(msg)
206
+
207
+ if isinstance(data[lgdo], Table):
208
+ _inplace_table_filter(lgdo, data[lgdo], obj_list)
209
+
210
+ store.write(data[lgdo], lgdo, output, wo_mode="append")
211
+
212
+ if lgdo_structs != {}:
213
+ output_file = store.gimme_file(output, mode="a")
214
+ for struct, struct_dtype in lgdo_structs.items():
215
+ msg = f"reset datatype of struct {struct} to {struct_dtype}"
216
+ log.debug(msg)
217
+
218
+ output_file[struct].attrs["datatype"] = struct_dtype
219
+ output_file.close()