legend-pydataobj 1.11.7__py3-none-any.whl → 1.11.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/RECORD +25 -26
- {legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/WHEEL +1 -1
- lgdo/__init__.py +5 -4
- lgdo/_version.py +2 -2
- lgdo/lh5/__init__.py +1 -3
- lgdo/lh5/_serializers/read/composite.py +1 -3
- lgdo/lh5/_serializers/write/composite.py +14 -8
- lgdo/lh5/concat.py +3 -9
- lgdo/lh5/core.py +33 -36
- lgdo/lh5/iterator.py +48 -27
- lgdo/lh5/store.py +22 -75
- lgdo/lh5/tools.py +0 -111
- lgdo/lh5/utils.py +6 -4
- lgdo/types/array.py +84 -15
- lgdo/types/encoded.py +25 -20
- lgdo/types/histogram.py +1 -1
- lgdo/types/lgdo.py +50 -0
- lgdo/types/table.py +49 -28
- lgdo/types/vectorofvectors.py +132 -94
- lgdo/types/vovutils.py +14 -4
- lgdo/types/waveformtable.py +19 -21
- lgdo/lh5_store.py +0 -284
- {legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/licenses/LICENSE +0 -0
- {legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,8 @@
|
|
1
|
-
legend_pydataobj-1.11.
|
2
|
-
lgdo/__init__.py,sha256=
|
3
|
-
lgdo/_version.py,sha256=
|
1
|
+
legend_pydataobj-1.11.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
2
|
+
lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
|
3
|
+
lgdo/_version.py,sha256=L3yqa8Aseny4agSopGdx4D8Sk3htDn12nBlc9W8wWes,513
|
4
4
|
lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
|
5
5
|
lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
|
6
|
-
lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
|
7
6
|
lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
|
8
7
|
lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
|
9
8
|
lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
|
@@ -13,19 +12,19 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
|
|
13
12
|
lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
|
14
13
|
lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
|
15
14
|
lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
|
16
|
-
lgdo/lh5/__init__.py,sha256=
|
17
|
-
lgdo/lh5/concat.py,sha256=
|
18
|
-
lgdo/lh5/core.py,sha256=
|
15
|
+
lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
|
16
|
+
lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
|
17
|
+
lgdo/lh5/core.py,sha256=3o6JsX6aNkMa3plX96a4vG7LWmfco33OuUzV_mMFStQ,13626
|
19
18
|
lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
|
20
19
|
lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
|
21
|
-
lgdo/lh5/iterator.py,sha256=
|
22
|
-
lgdo/lh5/store.py,sha256=
|
23
|
-
lgdo/lh5/tools.py,sha256=
|
24
|
-
lgdo/lh5/utils.py,sha256=
|
20
|
+
lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
|
21
|
+
lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
|
22
|
+
lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
|
23
|
+
lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
|
25
24
|
lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
|
26
25
|
lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
26
|
lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
|
28
|
-
lgdo/lh5/_serializers/read/composite.py,sha256=
|
27
|
+
lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
|
29
28
|
lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
|
30
29
|
lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
|
31
30
|
lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
|
@@ -33,24 +32,24 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
|
|
33
32
|
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
|
34
33
|
lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
34
|
lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
|
36
|
-
lgdo/lh5/_serializers/write/composite.py,sha256=
|
35
|
+
lgdo/lh5/_serializers/write/composite.py,sha256=wiq9O3Cb08wrAm14L7Jz69ppL7SnYEDHgW6pJtY8aBI,9425
|
37
36
|
lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
|
38
37
|
lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
|
39
38
|
lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
|
40
|
-
lgdo/types/array.py,sha256=
|
39
|
+
lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
|
41
40
|
lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
|
42
|
-
lgdo/types/encoded.py,sha256=
|
41
|
+
lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
|
43
42
|
lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
|
44
|
-
lgdo/types/histogram.py,sha256=
|
45
|
-
lgdo/types/lgdo.py,sha256=
|
43
|
+
lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
|
44
|
+
lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
|
46
45
|
lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
|
47
46
|
lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
|
48
|
-
lgdo/types/table.py,sha256=
|
49
|
-
lgdo/types/vectorofvectors.py,sha256=
|
50
|
-
lgdo/types/vovutils.py,sha256=
|
51
|
-
lgdo/types/waveformtable.py,sha256=
|
52
|
-
legend_pydataobj-1.11.
|
53
|
-
legend_pydataobj-1.11.
|
54
|
-
legend_pydataobj-1.11.
|
55
|
-
legend_pydataobj-1.11.
|
56
|
-
legend_pydataobj-1.11.
|
47
|
+
lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
|
48
|
+
lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
|
49
|
+
lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
|
50
|
+
lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
|
51
|
+
legend_pydataobj-1.11.8.dist-info/METADATA,sha256=qFXYWIsv8umqbHxK7ltDFwbPU_z5STG6E5CUAxZk6EY,44443
|
52
|
+
legend_pydataobj-1.11.8.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
53
|
+
legend_pydataobj-1.11.8.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
|
54
|
+
legend_pydataobj-1.11.8.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
55
|
+
legend_pydataobj-1.11.8.dist-info/RECORD,,
|
lgdo/__init__.py
CHANGED
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
|
45
45
|
from __future__ import annotations
|
46
46
|
|
47
47
|
from ._version import version as __version__
|
48
|
-
from .
|
48
|
+
from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
|
49
49
|
from .types import (
|
50
50
|
LGDO,
|
51
51
|
Array,
|
@@ -69,7 +69,6 @@ __all__ = [
|
|
69
69
|
"FixedSizeArray",
|
70
70
|
"Histogram",
|
71
71
|
"LH5Iterator",
|
72
|
-
"LH5Store",
|
73
72
|
"Scalar",
|
74
73
|
"Struct",
|
75
74
|
"Table",
|
@@ -77,8 +76,10 @@ __all__ = [
|
|
77
76
|
"VectorOfVectors",
|
78
77
|
"WaveformTable",
|
79
78
|
"__version__",
|
80
|
-
"load_dfs",
|
81
|
-
"load_nda",
|
82
79
|
"ls",
|
80
|
+
"read",
|
81
|
+
"read_as",
|
82
|
+
"read_n_rows",
|
83
83
|
"show",
|
84
|
+
"write",
|
84
85
|
]
|
lgdo/_version.py
CHANGED
lgdo/lh5/__init__.py
CHANGED
@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
|
|
11
11
|
from .core import read, read_as, write
|
12
12
|
from .iterator import LH5Iterator
|
13
13
|
from .store import LH5Store
|
14
|
-
from .tools import
|
14
|
+
from .tools import ls, show
|
15
15
|
from .utils import read_n_rows
|
16
16
|
|
17
17
|
__all__ = [
|
@@ -19,8 +19,6 @@ __all__ = [
|
|
19
19
|
"LH5Iterator",
|
20
20
|
"LH5Store",
|
21
21
|
"concat",
|
22
|
-
"load_dfs",
|
23
|
-
"load_nda",
|
24
22
|
"ls",
|
25
23
|
"read",
|
26
24
|
"read_as",
|
@@ -353,15 +353,13 @@ def _h5_read_table(
|
|
353
353
|
table = Table(col_dict=col_dict, attrs=attrs)
|
354
354
|
|
355
355
|
# set (write) loc to end of tree
|
356
|
-
table.
|
356
|
+
table.resize(do_warn=True)
|
357
357
|
return table, n_rows_read
|
358
358
|
|
359
359
|
# We have read all fields into the object buffer. Run
|
360
360
|
# checks: All columns should be the same size. So update
|
361
361
|
# table's size as necessary, warn if any mismatches are found
|
362
362
|
obj_buf.resize(do_warn=True)
|
363
|
-
# set (write) loc to end of tree
|
364
|
-
obj_buf.loc = obj_buf_start + n_rows_read
|
365
363
|
|
366
364
|
# check attributes
|
367
365
|
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
-
import os
|
5
4
|
from inspect import signature
|
5
|
+
from pathlib import Path
|
6
6
|
|
7
7
|
import h5py
|
8
8
|
|
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
|
|
53
53
|
# change any object in the file. So we use file:append for
|
54
54
|
# write_object:overwrite.
|
55
55
|
if not isinstance(lh5_file, h5py.File):
|
56
|
-
mode = "w" if wo_mode == "of" or not
|
56
|
+
mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
|
57
57
|
lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
|
58
58
|
|
59
59
|
log.debug(
|
@@ -186,19 +186,20 @@ def _h5_write_struct(
|
|
186
186
|
write_start=0,
|
187
187
|
**h5py_kwargs,
|
188
188
|
):
|
189
|
+
# this works for structs and derived (tables)
|
189
190
|
assert isinstance(obj, types.Struct)
|
190
191
|
|
191
192
|
# In order to append a column, we need to update the
|
192
|
-
# `table{old_fields}` value in `group.attrs['datatype"]` to include
|
193
|
+
# `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
|
193
194
|
# the new fields. One way to do this is to override
|
194
195
|
# `obj.attrs["datatype"]` to include old and new fields. Then we
|
195
|
-
# can write the fields to the table as normal.
|
196
|
+
# can write the fields to the struct/table as normal.
|
196
197
|
if wo_mode == "ac":
|
197
198
|
old_group = utils.get_h5_group(name, group)
|
198
199
|
lgdotype = datatype.datatype(old_group.attrs["datatype"])
|
199
200
|
fields = datatype.get_struct_fields(old_group.attrs["datatype"])
|
200
|
-
if not
|
201
|
-
msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
|
201
|
+
if lgdotype is not type(obj):
|
202
|
+
msg = f"Trying to append columns to an object of different type {lgdotype.__name__}!={type(obj)}"
|
202
203
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
203
204
|
|
204
205
|
# If the mode is `append_column`, make sure we aren't appending
|
@@ -211,8 +212,12 @@ def _h5_write_struct(
|
|
211
212
|
"column(s) to a table with the same field(s)"
|
212
213
|
)
|
213
214
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
215
|
+
|
214
216
|
# It doesn't matter what key we access, as all fields in the old table have the same size
|
215
|
-
if
|
217
|
+
if (
|
218
|
+
isinstance(obj, types.Table)
|
219
|
+
and old_group[next(iter(old_group.keys()))].size != obj.size
|
220
|
+
):
|
216
221
|
msg = (
|
217
222
|
f"Table sizes don't match. Trying to append column of size {obj.size} "
|
218
223
|
f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
|
@@ -222,7 +227,8 @@ def _h5_write_struct(
|
|
222
227
|
# Now we can append the obj.keys() to the old fields, and then update obj.attrs.
|
223
228
|
fields.extend(list(obj.keys()))
|
224
229
|
obj.attrs.pop("datatype")
|
225
|
-
|
230
|
+
|
231
|
+
obj.attrs["datatype"] = obj.datatype_name() + "{" + ",".join(fields) + "}"
|
226
232
|
|
227
233
|
group = utils.get_h5_group(
|
228
234
|
name,
|
lgdo/lh5/concat.py
CHANGED
@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
|
|
76
76
|
continue
|
77
77
|
|
78
78
|
# read as little as possible
|
79
|
-
obj
|
79
|
+
obj = store.read(current, h5f0, n_rows=1)
|
80
80
|
if isinstance(obj, (Table, Array, VectorOfVectors)):
|
81
81
|
lgdos.append(current)
|
82
82
|
|
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
|
|
139
139
|
_inplace_table_filter(key, val, obj_list)
|
140
140
|
|
141
141
|
|
142
|
-
def _slice(obj, n_rows):
|
143
|
-
ak_obj = obj.view_as("ak")[:n_rows]
|
144
|
-
obj_type = type(obj)
|
145
|
-
return obj_type(ak_obj)
|
146
|
-
|
147
|
-
|
148
142
|
def lh5concat(
|
149
143
|
lh5_files: list,
|
150
144
|
output: str,
|
@@ -186,8 +180,8 @@ def lh5concat(
|
|
186
180
|
# loop over lgdo objects
|
187
181
|
for lgdo in lgdos:
|
188
182
|
# iterate over the files
|
189
|
-
for lh5_obj
|
190
|
-
data = {lgdo:
|
183
|
+
for lh5_obj in LH5Iterator(lh5_files, lgdo):
|
184
|
+
data = {lgdo: lh5_obj}
|
191
185
|
|
192
186
|
# remove the nested fields
|
193
187
|
_remove_nested_fields(data, obj_list)
|
lgdo/lh5/core.py
CHANGED
@@ -4,6 +4,7 @@ import bisect
|
|
4
4
|
import inspect
|
5
5
|
import sys
|
6
6
|
from collections.abc import Mapping, Sequence
|
7
|
+
from contextlib import suppress
|
7
8
|
from typing import Any
|
8
9
|
|
9
10
|
import h5py
|
@@ -92,8 +93,7 @@ def read(
|
|
92
93
|
will be set to ``True``, while the rest will default to ``False``.
|
93
94
|
obj_buf
|
94
95
|
Read directly into memory provided in `obj_buf`. Note: the buffer
|
95
|
-
will be
|
96
|
-
buffer length, send in ``n_rows = len(obj_buf)``.
|
96
|
+
will be resized to accommodate the data retrieved.
|
97
97
|
obj_buf_start
|
98
98
|
Start location in ``obj_buf`` for read. For concatenating data to
|
99
99
|
array-like objects.
|
@@ -106,25 +106,25 @@ def read(
|
|
106
106
|
|
107
107
|
Returns
|
108
108
|
-------
|
109
|
-
|
110
|
-
|
111
|
-
successfully read out. Essential for arrays when the amount of data
|
112
|
-
is smaller than the object buffer. For scalars and structs
|
113
|
-
`n_rows_read` will be``1``. For tables it is redundant with
|
114
|
-
``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
|
109
|
+
object
|
110
|
+
the read-out object
|
115
111
|
"""
|
116
112
|
if isinstance(lh5_file, h5py.File):
|
117
113
|
lh5_obj = lh5_file[name]
|
118
114
|
elif isinstance(lh5_file, str):
|
119
115
|
lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
|
120
|
-
|
116
|
+
try:
|
117
|
+
lh5_obj = lh5_file[name]
|
118
|
+
except KeyError as ke:
|
119
|
+
err = f"Object {name} not found in file {lh5_file.filename}"
|
120
|
+
raise KeyError(err) from ke
|
121
121
|
else:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
if obj_buf is not None:
|
123
|
+
obj_buf.resize(obj_buf_start)
|
124
|
+
else:
|
125
|
+
obj_buf_start = 0
|
126
126
|
|
127
|
-
for i, h5f in enumerate(
|
127
|
+
for i, h5f in enumerate(lh5_file):
|
128
128
|
if (
|
129
129
|
isinstance(idx, (list, tuple))
|
130
130
|
and len(idx) > 0
|
@@ -146,33 +146,26 @@ def read(
|
|
146
146
|
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
147
147
|
else:
|
148
148
|
idx_i = None
|
149
|
-
n_rows_i = n_rows - n_rows_read
|
150
149
|
|
151
|
-
|
150
|
+
obj_buf_start_i = len(obj_buf) if obj_buf else 0
|
151
|
+
n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
|
152
|
+
|
153
|
+
obj_buf = read(
|
152
154
|
name,
|
153
155
|
h5f,
|
154
|
-
start_row,
|
156
|
+
start_row if i == 0 else 0,
|
155
157
|
n_rows_i,
|
156
158
|
idx_i,
|
157
159
|
use_h5idx,
|
158
160
|
field_mask,
|
159
161
|
obj_buf,
|
160
|
-
|
162
|
+
obj_buf_start_i,
|
161
163
|
decompress,
|
162
164
|
)
|
163
|
-
if isinstance(obj_ret, tuple):
|
164
|
-
obj_buf, n_rows_read_i = obj_ret
|
165
|
-
obj_buf_is_new = True
|
166
|
-
else:
|
167
|
-
obj_buf = obj_ret
|
168
|
-
n_rows_read_i = len(obj_buf)
|
169
165
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
start_row = 0
|
174
|
-
obj_buf_start += n_rows_read_i
|
175
|
-
return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
|
166
|
+
if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
|
167
|
+
return obj_buf
|
168
|
+
return obj_buf
|
176
169
|
|
177
170
|
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
178
171
|
idx = idx[0]
|
@@ -192,8 +185,10 @@ def read(
|
|
192
185
|
obj_buf_start=obj_buf_start,
|
193
186
|
decompress=decompress,
|
194
187
|
)
|
188
|
+
with suppress(AttributeError):
|
189
|
+
obj.resize(obj_buf_start + n_rows_read)
|
195
190
|
|
196
|
-
return obj
|
191
|
+
return obj
|
197
192
|
|
198
193
|
|
199
194
|
def write(
|
@@ -273,11 +268,13 @@ def write(
|
|
273
268
|
end of array is the same as ``append``.
|
274
269
|
- ``overwrite_file`` or ``of``: delete file if present prior to
|
275
270
|
writing to it. `write_start` should be 0 (its ignored).
|
276
|
-
- ``append_column`` or ``ac``: append columns from an
|
277
|
-
:class:`~.lgdo.
|
278
|
-
:class:`~.lgdo.table.Table`
|
279
|
-
|
280
|
-
|
271
|
+
- ``append_column`` or ``ac``: append fields/columns from an
|
272
|
+
:class:`~.lgdo.struct.Struct` `obj` (and derived types such as
|
273
|
+
:class:`~.lgdo.table.Table`) only if there is an existing
|
274
|
+
:class:`~.lgdo.struct.Struct` in the `lh5_file` with the same `name`.
|
275
|
+
If there are matching fields, it errors out. If appending to a
|
276
|
+
``Table`` and the size of the new column is different from the size
|
277
|
+
of the existing table, it errors out.
|
281
278
|
write_start
|
282
279
|
row in the output file (if already existing) to start overwriting
|
283
280
|
from.
|
lgdo/lh5/iterator.py
CHANGED
@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
|
|
24
24
|
|
25
25
|
This can be used as an iterator:
|
26
26
|
|
27
|
-
|
27
|
+
|
28
|
+
>>> for lh5_obj in LH5Iterator(...):
|
28
29
|
>>> # do the thing!
|
29
30
|
|
30
31
|
This is intended for if you are reading a large quantity of data. This
|
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
|
|
42
43
|
In addition to accessing requested data via ``lh5_obj``, several
|
43
44
|
properties exist to tell you where that data came from:
|
44
45
|
|
46
|
+
- lh5_it.current_i_entry: get the index within the entry list of the
|
47
|
+
first entry that is currently read
|
45
48
|
- lh5_it.current_local_entries: get the entry numbers relative to the
|
46
49
|
file the data came from
|
47
50
|
- lh5_it.current_global_entries: get the entry number relative to the
|
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
|
|
49
52
|
- lh5_it.current_files: get the file name corresponding to each entry
|
50
53
|
- lh5_it.current_groups: get the group name corresponding to each entry
|
51
54
|
|
52
|
-
This class can also be used
|
55
|
+
This class can also be used for random access:
|
53
56
|
|
54
|
-
>>> lh5_obj
|
57
|
+
>>> lh5_obj = lh5_it.read(i_entry)
|
55
58
|
|
56
59
|
to read the block of entries starting at i_entry. In case of multiple files
|
57
60
|
or the use of an event selection, i_entry refers to a global event index
|
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
|
|
65
68
|
base_path: str = "",
|
66
69
|
entry_list: list[int] | list[list[int]] | None = None,
|
67
70
|
entry_mask: list[bool] | list[list[bool]] | None = None,
|
71
|
+
i_start: int = 0,
|
72
|
+
n_entries: int | None = None,
|
68
73
|
field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
|
69
74
|
buffer_len: int = "100*MB",
|
70
75
|
file_cache: int = 10,
|
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
|
|
89
94
|
entry_mask
|
90
95
|
mask of entries to read. If a list of arrays is provided, expect
|
91
96
|
one for each file. Ignore if a selection list is provided.
|
97
|
+
i_start
|
98
|
+
index of first entry to start at when iterating
|
99
|
+
n_entries
|
100
|
+
number of entries to read before terminating iteration
|
92
101
|
field_mask
|
93
102
|
mask of which fields to read. See :meth:`LH5Store.read` for
|
94
103
|
more details.
|
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
|
|
183
192
|
msg = f"can't open any files from {lh5_files}"
|
184
193
|
raise RuntimeError(msg)
|
185
194
|
|
186
|
-
self.
|
195
|
+
self.i_start = i_start
|
196
|
+
self.n_entries = n_entries
|
187
197
|
self.current_i_entry = 0
|
188
198
|
self.next_i_entry = 0
|
189
199
|
|
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
|
|
317
327
|
)
|
318
328
|
return self.global_entry_list
|
319
329
|
|
320
|
-
def read(self, i_entry: int) ->
|
321
|
-
"
|
322
|
-
|
323
|
-
|
324
|
-
|
330
|
+
def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
|
331
|
+
"Read the nextlocal chunk of events, starting at entry."
|
332
|
+
self.lh5_buffer.resize(0)
|
333
|
+
|
334
|
+
if n_entries is None:
|
335
|
+
n_entries = self.buffer_len
|
336
|
+
elif n_entries == 0:
|
337
|
+
return self.lh5_buffer
|
338
|
+
elif n_entries > self.buffer_len:
|
339
|
+
msg = "n_entries cannot be larger than buffer_len"
|
340
|
+
raise ValueError(msg)
|
325
341
|
|
326
342
|
# if file hasn't been opened yet, search through files
|
327
343
|
# sequentially until we find the right one
|
344
|
+
i_file = np.searchsorted(self.entry_map, i_entry, "right")
|
328
345
|
if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
|
329
346
|
while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
|
330
347
|
i_file
|
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
|
|
332
349
|
i_file += 1
|
333
350
|
|
334
351
|
if i_file == len(self.lh5_files):
|
335
|
-
return
|
352
|
+
return self.lh5_buffer
|
336
353
|
local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
|
337
354
|
|
338
|
-
while self.
|
355
|
+
while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
|
339
356
|
# Loop through files
|
340
357
|
local_idx = self.get_file_entrylist(i_file)
|
341
358
|
if local_idx is not None and len(local_idx) == 0:
|
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
|
|
344
361
|
continue
|
345
362
|
|
346
363
|
i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
|
347
|
-
self.lh5_buffer
|
364
|
+
self.lh5_buffer = self.lh5_st.read(
|
348
365
|
self.groups[i_file],
|
349
366
|
self.lh5_files[i_file],
|
350
367
|
start_row=i_local,
|
351
|
-
n_rows=
|
368
|
+
n_rows=n_entries - len(self.lh5_buffer),
|
352
369
|
idx=local_idx,
|
353
370
|
field_mask=self.field_mask,
|
354
371
|
obj_buf=self.lh5_buffer,
|
355
|
-
obj_buf_start=self.
|
372
|
+
obj_buf_start=len(self.lh5_buffer),
|
356
373
|
)
|
357
374
|
|
358
|
-
self.n_rows += n_rows
|
359
375
|
i_file += 1
|
360
376
|
local_i_entry = 0
|
361
377
|
|
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
|
|
364
380
|
if self.friend is not None:
|
365
381
|
self.friend.read(i_entry)
|
366
382
|
|
367
|
-
return
|
383
|
+
return self.lh5_buffer
|
368
384
|
|
369
385
|
def reset_field_mask(self, mask):
|
370
386
|
"""Replaces the field mask of this iterator and any friends with mask"""
|
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
|
|
375
391
|
@property
|
376
392
|
def current_local_entries(self) -> NDArray[int]:
|
377
393
|
"""Return list of local file entries in buffer"""
|
378
|
-
cur_entries = np.zeros(self.
|
394
|
+
cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
|
379
395
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
380
396
|
file_start = self._get_file_cumentries(i_file - 1)
|
381
397
|
i_local = self.current_i_entry - file_start
|
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
|
|
402
418
|
@property
|
403
419
|
def current_global_entries(self) -> NDArray[int]:
|
404
420
|
"""Return list of local file entries in buffer"""
|
405
|
-
cur_entries = np.zeros(self.
|
421
|
+
cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
|
406
422
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
407
423
|
file_start = self._get_file_cumentries(i_file - 1)
|
408
424
|
i_local = self.current_i_entry - file_start
|
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
|
|
433
449
|
@property
|
434
450
|
def current_files(self) -> NDArray[str]:
|
435
451
|
"""Return list of file names for entries in buffer"""
|
436
|
-
cur_files = np.zeros(self.
|
452
|
+
cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
|
437
453
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
438
454
|
file_start = self._get_file_cumentries(i_file - 1)
|
439
455
|
i_local = self.current_i_entry - file_start
|
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
|
|
455
471
|
@property
|
456
472
|
def current_groups(self) -> NDArray[str]:
|
457
473
|
"""Return list of group names for entries in buffer"""
|
458
|
-
cur_groups = np.zeros(self.
|
474
|
+
cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
|
459
475
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
460
476
|
file_start = self._get_file_cumentries(i_file - 1)
|
461
477
|
i_local = self.current_i_entry - file_start
|
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
|
|
485
501
|
def __iter__(self) -> typing.Iterator:
|
486
502
|
"""Loop through entries in blocks of size buffer_len."""
|
487
503
|
self.current_i_entry = 0
|
488
|
-
self.next_i_entry =
|
504
|
+
self.next_i_entry = self.i_start
|
489
505
|
return self
|
490
506
|
|
491
507
|
def __next__(self) -> tuple[LGDO, int, int]:
|
492
|
-
"""Read next buffer_len entries and return lh5_table
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
508
|
+
"""Read next buffer_len entries and return lh5_table and iterator entry."""
|
509
|
+
n_entries = self.n_entries
|
510
|
+
if n_entries is not None:
|
511
|
+
n_entries = min(
|
512
|
+
self.buffer_len, n_entries + self.i_start - self.next_i_entry
|
513
|
+
)
|
514
|
+
|
515
|
+
buf = self.read(self.next_i_entry, n_entries)
|
516
|
+
if len(buf) == 0:
|
497
517
|
raise StopIteration
|
498
|
-
|
518
|
+
self.next_i_entry = self.current_i_entry + len(buf)
|
519
|
+
return buf
|