legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/RECORD +26 -25
- lgdo/__init__.py +4 -5
- lgdo/_version.py +2 -2
- lgdo/lh5/__init__.py +3 -1
- lgdo/lh5/_serializers/read/composite.py +3 -1
- lgdo/lh5/_serializers/write/composite.py +28 -11
- lgdo/lh5/concat.py +9 -3
- lgdo/lh5/core.py +31 -26
- lgdo/lh5/iterator.py +27 -48
- lgdo/lh5/store.py +75 -22
- lgdo/lh5/tools.py +111 -0
- lgdo/lh5/utils.py +4 -6
- lgdo/lh5_store.py +284 -0
- lgdo/types/array.py +15 -84
- lgdo/types/encoded.py +20 -25
- lgdo/types/histogram.py +1 -1
- lgdo/types/lgdo.py +0 -50
- lgdo/types/table.py +28 -49
- lgdo/types/vectorofvectors.py +94 -132
- lgdo/types/vovutils.py +4 -14
- lgdo/types/waveformtable.py +21 -19
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/WHEEL +0 -0
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/licenses/LICENSE +0 -0
- {legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,9 @@
|
|
1
|
-
legend_pydataobj-1.11.
|
2
|
-
lgdo/__init__.py,sha256=
|
3
|
-
lgdo/_version.py,sha256=
|
1
|
+
legend_pydataobj-1.11.10.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
2
|
+
lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
|
3
|
+
lgdo/_version.py,sha256=8C1gggsLVh7AtWQynH0LD96q9_ChbquuufT1aPfdUFU,515
|
4
4
|
lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
|
5
5
|
lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
|
6
|
+
lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
|
6
7
|
lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
|
7
8
|
lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
|
8
9
|
lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
|
@@ -12,19 +13,19 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
|
|
12
13
|
lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
|
13
14
|
lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
|
14
15
|
lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
|
15
|
-
lgdo/lh5/__init__.py,sha256=
|
16
|
-
lgdo/lh5/concat.py,sha256=
|
17
|
-
lgdo/lh5/core.py,sha256=
|
16
|
+
lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
|
17
|
+
lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
|
18
|
+
lgdo/lh5/core.py,sha256=HT50rolOtTijgaGFskRgzoRbC0w-kxrRS2v9O5Q9Ugo,14067
|
18
19
|
lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
|
19
20
|
lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
|
20
|
-
lgdo/lh5/iterator.py,sha256=
|
21
|
-
lgdo/lh5/store.py,sha256=
|
22
|
-
lgdo/lh5/tools.py,sha256=
|
23
|
-
lgdo/lh5/utils.py,sha256=
|
21
|
+
lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
|
22
|
+
lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
|
23
|
+
lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
|
24
|
+
lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
|
24
25
|
lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
|
25
26
|
lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
27
|
lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
|
27
|
-
lgdo/lh5/_serializers/read/composite.py,sha256=
|
28
|
+
lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
|
28
29
|
lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
|
29
30
|
lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
|
30
31
|
lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
|
@@ -32,24 +33,24 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
|
|
32
33
|
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
|
33
34
|
lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
35
|
lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
|
35
|
-
lgdo/lh5/_serializers/write/composite.py,sha256=
|
36
|
+
lgdo/lh5/_serializers/write/composite.py,sha256=eEfisBAxpF1Q8v4AbORbBQyxg0p5ugMo9cBjicOC5KI,9979
|
36
37
|
lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
|
37
38
|
lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
|
38
39
|
lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
|
39
|
-
lgdo/types/array.py,sha256=
|
40
|
+
lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
|
40
41
|
lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
|
41
|
-
lgdo/types/encoded.py,sha256=
|
42
|
+
lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
|
42
43
|
lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
|
43
|
-
lgdo/types/histogram.py,sha256=
|
44
|
-
lgdo/types/lgdo.py,sha256=
|
44
|
+
lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
|
45
|
+
lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
|
45
46
|
lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
|
46
47
|
lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
|
47
|
-
lgdo/types/table.py,sha256=
|
48
|
-
lgdo/types/vectorofvectors.py,sha256=
|
49
|
-
lgdo/types/vovutils.py,sha256=
|
50
|
-
lgdo/types/waveformtable.py,sha256=
|
51
|
-
legend_pydataobj-1.11.
|
52
|
-
legend_pydataobj-1.11.
|
53
|
-
legend_pydataobj-1.11.
|
54
|
-
legend_pydataobj-1.11.
|
55
|
-
legend_pydataobj-1.11.
|
48
|
+
lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
|
49
|
+
lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
|
50
|
+
lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
|
51
|
+
lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
|
52
|
+
legend_pydataobj-1.11.10.dist-info/METADATA,sha256=oWYdBOz-guRsPbd6918FxEhKM2C_8sQrhSqpp4AlL30,44444
|
53
|
+
legend_pydataobj-1.11.10.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
|
54
|
+
legend_pydataobj-1.11.10.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
|
55
|
+
legend_pydataobj-1.11.10.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
56
|
+
legend_pydataobj-1.11.10.dist-info/RECORD,,
|
lgdo/__init__.py
CHANGED
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
|
45
45
|
from __future__ import annotations
|
46
46
|
|
47
47
|
from ._version import version as __version__
|
48
|
-
from .
|
48
|
+
from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
|
49
49
|
from .types import (
|
50
50
|
LGDO,
|
51
51
|
Array,
|
@@ -69,6 +69,7 @@ __all__ = [
|
|
69
69
|
"FixedSizeArray",
|
70
70
|
"Histogram",
|
71
71
|
"LH5Iterator",
|
72
|
+
"LH5Store",
|
72
73
|
"Scalar",
|
73
74
|
"Struct",
|
74
75
|
"Table",
|
@@ -76,10 +77,8 @@ __all__ = [
|
|
76
77
|
"VectorOfVectors",
|
77
78
|
"WaveformTable",
|
78
79
|
"__version__",
|
80
|
+
"load_dfs",
|
81
|
+
"load_nda",
|
79
82
|
"ls",
|
80
|
-
"read",
|
81
|
-
"read_as",
|
82
|
-
"read_n_rows",
|
83
83
|
"show",
|
84
|
-
"write",
|
85
84
|
]
|
lgdo/_version.py
CHANGED
lgdo/lh5/__init__.py
CHANGED
@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
|
|
11
11
|
from .core import read, read_as, write
|
12
12
|
from .iterator import LH5Iterator
|
13
13
|
from .store import LH5Store
|
14
|
-
from .tools import ls, show
|
14
|
+
from .tools import load_dfs, load_nda, ls, show
|
15
15
|
from .utils import read_n_rows
|
16
16
|
|
17
17
|
__all__ = [
|
@@ -19,6 +19,8 @@ __all__ = [
|
|
19
19
|
"LH5Iterator",
|
20
20
|
"LH5Store",
|
21
21
|
"concat",
|
22
|
+
"load_dfs",
|
23
|
+
"load_nda",
|
22
24
|
"ls",
|
23
25
|
"read",
|
24
26
|
"read_as",
|
@@ -353,13 +353,15 @@ def _h5_read_table(
|
|
353
353
|
table = Table(col_dict=col_dict, attrs=attrs)
|
354
354
|
|
355
355
|
# set (write) loc to end of tree
|
356
|
-
table.
|
356
|
+
table.loc = n_rows_read
|
357
357
|
return table, n_rows_read
|
358
358
|
|
359
359
|
# We have read all fields into the object buffer. Run
|
360
360
|
# checks: All columns should be the same size. So update
|
361
361
|
# table's size as necessary, warn if any mismatches are found
|
362
362
|
obj_buf.resize(do_warn=True)
|
363
|
+
# set (write) loc to end of tree
|
364
|
+
obj_buf.loc = obj_buf_start + n_rows_read
|
363
365
|
|
364
366
|
# check attributes
|
365
367
|
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
@@ -1,8 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
+
import os
|
4
5
|
from inspect import signature
|
5
|
-
from pathlib import Path
|
6
6
|
|
7
7
|
import h5py
|
8
8
|
|
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
|
|
53
53
|
# change any object in the file. So we use file:append for
|
54
54
|
# write_object:overwrite.
|
55
55
|
if not isinstance(lh5_file, h5py.File):
|
56
|
-
mode = "w" if wo_mode == "of" or not
|
56
|
+
mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
|
57
57
|
lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
|
58
58
|
|
59
59
|
log.debug(
|
@@ -191,15 +191,26 @@ def _h5_write_struct(
|
|
191
191
|
|
192
192
|
# In order to append a column, we need to update the
|
193
193
|
# `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
|
194
|
-
# the new fields.
|
195
|
-
#
|
196
|
-
#
|
194
|
+
# the new fields. One way to do this is to override `obj.attrs["datatype"]`
|
195
|
+
# to include old and new fields. Then we can write the fields to the
|
196
|
+
# struct/table as normal.
|
197
197
|
if wo_mode == "ac":
|
198
|
+
if name not in group:
|
199
|
+
msg = "Cannot append column to non-existing struct on disk"
|
200
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
201
|
+
|
198
202
|
old_group = utils.get_h5_group(name, group)
|
203
|
+
if "datatype" not in old_group.attrs:
|
204
|
+
msg = "Cannot append column to an existing non-LGDO object on disk"
|
205
|
+
raise LH5EncodeError(msg, lh5_file, group, name)
|
206
|
+
|
199
207
|
lgdotype = datatype.datatype(old_group.attrs["datatype"])
|
200
208
|
fields = datatype.get_struct_fields(old_group.attrs["datatype"])
|
201
209
|
if lgdotype is not type(obj):
|
202
|
-
msg =
|
210
|
+
msg = (
|
211
|
+
"Trying to append columns to an object of different "
|
212
|
+
f"type {lgdotype.__name__}!={type(obj)}"
|
213
|
+
)
|
203
214
|
raise LH5EncodeError(msg, lh5_file, group, name)
|
204
215
|
|
205
216
|
# If the mode is `append_column`, make sure we aren't appending
|
@@ -230,14 +241,22 @@ def _h5_write_struct(
|
|
230
241
|
|
231
242
|
obj.attrs["datatype"] = obj.datatype_name() + "{" + ",".join(fields) + "}"
|
232
243
|
|
244
|
+
# propagating wo_mode="ac" to nested LGDOs does not make any sense
|
245
|
+
wo_mode = "append"
|
246
|
+
|
247
|
+
# overwrite attributes of the existing struct
|
248
|
+
attrs_overwrite = True
|
249
|
+
else:
|
250
|
+
attrs_overwrite = wo_mode == "o"
|
251
|
+
|
233
252
|
group = utils.get_h5_group(
|
234
253
|
name,
|
235
254
|
group,
|
236
255
|
grp_attrs=obj.attrs,
|
237
|
-
overwrite=
|
256
|
+
overwrite=attrs_overwrite,
|
238
257
|
)
|
239
258
|
# If the mode is overwrite, then we need to peek into the file's
|
240
|
-
# table's existing fields.
|
259
|
+
# table's existing fields. If we are writing a new table to the
|
241
260
|
# group that does not contain an old field, we should delete that
|
242
261
|
# old field from the file
|
243
262
|
if wo_mode == "o":
|
@@ -266,11 +285,9 @@ def _h5_write_struct(
|
|
266
285
|
else:
|
267
286
|
obj_fld = obj[field]
|
268
287
|
|
269
|
-
# Convert keys to string for dataset names
|
270
|
-
f = str(field)
|
271
288
|
_h5_write_lgdo(
|
272
289
|
obj_fld,
|
273
|
-
|
290
|
+
str(field),
|
274
291
|
lh5_file,
|
275
292
|
group=group,
|
276
293
|
start_row=start_row,
|
lgdo/lh5/concat.py
CHANGED
@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
|
|
76
76
|
continue
|
77
77
|
|
78
78
|
# read as little as possible
|
79
|
-
obj = store.read(current, h5f0, n_rows=1)
|
79
|
+
obj, _ = store.read(current, h5f0, n_rows=1)
|
80
80
|
if isinstance(obj, (Table, Array, VectorOfVectors)):
|
81
81
|
lgdos.append(current)
|
82
82
|
|
@@ -139,6 +139,12 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
|
|
139
139
|
_inplace_table_filter(key, val, obj_list)
|
140
140
|
|
141
141
|
|
142
|
+
def _slice(obj, n_rows):
|
143
|
+
ak_obj = obj.view_as("ak")[:n_rows]
|
144
|
+
obj_type = type(obj)
|
145
|
+
return obj_type(ak_obj)
|
146
|
+
|
147
|
+
|
142
148
|
def lh5concat(
|
143
149
|
lh5_files: list,
|
144
150
|
output: str,
|
@@ -180,8 +186,8 @@ def lh5concat(
|
|
180
186
|
# loop over lgdo objects
|
181
187
|
for lgdo in lgdos:
|
182
188
|
# iterate over the files
|
183
|
-
for lh5_obj in LH5Iterator(lh5_files, lgdo):
|
184
|
-
data = {lgdo: lh5_obj}
|
189
|
+
for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
|
190
|
+
data = {lgdo: _slice(lh5_obj, n_rows)}
|
185
191
|
|
186
192
|
# remove the nested fields
|
187
193
|
_remove_nested_fields(data, obj_list)
|
lgdo/lh5/core.py
CHANGED
@@ -4,7 +4,6 @@ import bisect
|
|
4
4
|
import inspect
|
5
5
|
import sys
|
6
6
|
from collections.abc import Mapping, Sequence
|
7
|
-
from contextlib import suppress
|
8
7
|
from typing import Any
|
9
8
|
|
10
9
|
import h5py
|
@@ -93,7 +92,8 @@ def read(
|
|
93
92
|
will be set to ``True``, while the rest will default to ``False``.
|
94
93
|
obj_buf
|
95
94
|
Read directly into memory provided in `obj_buf`. Note: the buffer
|
96
|
-
will be
|
95
|
+
will be expanded to accommodate the data requested. To maintain the
|
96
|
+
buffer length, send in ``n_rows = len(obj_buf)``.
|
97
97
|
obj_buf_start
|
98
98
|
Start location in ``obj_buf`` for read. For concatenating data to
|
99
99
|
array-like objects.
|
@@ -106,25 +106,25 @@ def read(
|
|
106
106
|
|
107
107
|
Returns
|
108
108
|
-------
|
109
|
-
object
|
110
|
-
the read-out object
|
109
|
+
(object, n_rows_read)
|
110
|
+
`object` is the read-out object `n_rows_read` is the number of rows
|
111
|
+
successfully read out. Essential for arrays when the amount of data
|
112
|
+
is smaller than the object buffer. For scalars and structs
|
113
|
+
`n_rows_read` will be``1``. For tables it is redundant with
|
114
|
+
``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
|
111
115
|
"""
|
112
116
|
if isinstance(lh5_file, h5py.File):
|
113
117
|
lh5_obj = lh5_file[name]
|
114
118
|
elif isinstance(lh5_file, str):
|
115
119
|
lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
|
116
|
-
|
117
|
-
lh5_obj = lh5_file[name]
|
118
|
-
except KeyError as ke:
|
119
|
-
err = f"Object {name} not found in file {lh5_file.filename}"
|
120
|
-
raise KeyError(err) from ke
|
120
|
+
lh5_obj = lh5_file[name]
|
121
121
|
else:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
122
|
+
lh5_files = list(lh5_file)
|
123
|
+
|
124
|
+
n_rows_read = 0
|
125
|
+
obj_buf_is_new = False
|
126
126
|
|
127
|
-
for i, h5f in enumerate(
|
127
|
+
for i, h5f in enumerate(lh5_files):
|
128
128
|
if (
|
129
129
|
isinstance(idx, (list, tuple))
|
130
130
|
and len(idx) > 0
|
@@ -146,26 +146,33 @@ def read(
|
|
146
146
|
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
147
147
|
else:
|
148
148
|
idx_i = None
|
149
|
+
n_rows_i = n_rows - n_rows_read
|
149
150
|
|
150
|
-
|
151
|
-
n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
|
152
|
-
|
153
|
-
obj_buf = read(
|
151
|
+
obj_ret = read(
|
154
152
|
name,
|
155
153
|
h5f,
|
156
|
-
start_row
|
154
|
+
start_row,
|
157
155
|
n_rows_i,
|
158
156
|
idx_i,
|
159
157
|
use_h5idx,
|
160
158
|
field_mask,
|
161
159
|
obj_buf,
|
162
|
-
|
160
|
+
obj_buf_start,
|
163
161
|
decompress,
|
164
162
|
)
|
163
|
+
if isinstance(obj_ret, tuple):
|
164
|
+
obj_buf, n_rows_read_i = obj_ret
|
165
|
+
obj_buf_is_new = True
|
166
|
+
else:
|
167
|
+
obj_buf = obj_ret
|
168
|
+
n_rows_read_i = len(obj_buf)
|
165
169
|
|
166
|
-
|
167
|
-
|
168
|
-
|
170
|
+
n_rows_read += n_rows_read_i
|
171
|
+
if n_rows_read >= n_rows or obj_buf is None:
|
172
|
+
return obj_buf, n_rows_read
|
173
|
+
start_row = 0
|
174
|
+
obj_buf_start += n_rows_read_i
|
175
|
+
return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
|
169
176
|
|
170
177
|
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
171
178
|
idx = idx[0]
|
@@ -185,10 +192,8 @@ def read(
|
|
185
192
|
obj_buf_start=obj_buf_start,
|
186
193
|
decompress=decompress,
|
187
194
|
)
|
188
|
-
with suppress(AttributeError):
|
189
|
-
obj.resize(obj_buf_start + n_rows_read)
|
190
195
|
|
191
|
-
return obj
|
196
|
+
return obj if obj_buf is None else (obj, n_rows_read)
|
192
197
|
|
193
198
|
|
194
199
|
def write(
|
lgdo/lh5/iterator.py
CHANGED
@@ -24,8 +24,7 @@ class LH5Iterator(typing.Iterator):
|
|
24
24
|
|
25
25
|
This can be used as an iterator:
|
26
26
|
|
27
|
-
|
28
|
-
>>> for lh5_obj in LH5Iterator(...):
|
27
|
+
>>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
|
29
28
|
>>> # do the thing!
|
30
29
|
|
31
30
|
This is intended for if you are reading a large quantity of data. This
|
@@ -43,8 +42,6 @@ class LH5Iterator(typing.Iterator):
|
|
43
42
|
In addition to accessing requested data via ``lh5_obj``, several
|
44
43
|
properties exist to tell you where that data came from:
|
45
44
|
|
46
|
-
- lh5_it.current_i_entry: get the index within the entry list of the
|
47
|
-
first entry that is currently read
|
48
45
|
- lh5_it.current_local_entries: get the entry numbers relative to the
|
49
46
|
file the data came from
|
50
47
|
- lh5_it.current_global_entries: get the entry number relative to the
|
@@ -52,9 +49,9 @@ class LH5Iterator(typing.Iterator):
|
|
52
49
|
- lh5_it.current_files: get the file name corresponding to each entry
|
53
50
|
- lh5_it.current_groups: get the group name corresponding to each entry
|
54
51
|
|
55
|
-
This class can also be used for random access:
|
52
|
+
This class can also be used either for random access:
|
56
53
|
|
57
|
-
>>> lh5_obj = lh5_it.read(i_entry)
|
54
|
+
>>> lh5_obj, n_rows = lh5_it.read(i_entry)
|
58
55
|
|
59
56
|
to read the block of entries starting at i_entry. In case of multiple files
|
60
57
|
or the use of an event selection, i_entry refers to a global event index
|
@@ -68,8 +65,6 @@ class LH5Iterator(typing.Iterator):
|
|
68
65
|
base_path: str = "",
|
69
66
|
entry_list: list[int] | list[list[int]] | None = None,
|
70
67
|
entry_mask: list[bool] | list[list[bool]] | None = None,
|
71
|
-
i_start: int = 0,
|
72
|
-
n_entries: int | None = None,
|
73
68
|
field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
|
74
69
|
buffer_len: int = "100*MB",
|
75
70
|
file_cache: int = 10,
|
@@ -94,10 +89,6 @@ class LH5Iterator(typing.Iterator):
|
|
94
89
|
entry_mask
|
95
90
|
mask of entries to read. If a list of arrays is provided, expect
|
96
91
|
one for each file. Ignore if a selection list is provided.
|
97
|
-
i_start
|
98
|
-
index of first entry to start at when iterating
|
99
|
-
n_entries
|
100
|
-
number of entries to read before terminating iteration
|
101
92
|
field_mask
|
102
93
|
mask of which fields to read. See :meth:`LH5Store.read` for
|
103
94
|
more details.
|
@@ -192,8 +183,7 @@ class LH5Iterator(typing.Iterator):
|
|
192
183
|
msg = f"can't open any files from {lh5_files}"
|
193
184
|
raise RuntimeError(msg)
|
194
185
|
|
195
|
-
self.
|
196
|
-
self.n_entries = n_entries
|
186
|
+
self.n_rows = 0
|
197
187
|
self.current_i_entry = 0
|
198
188
|
self.next_i_entry = 0
|
199
189
|
|
@@ -327,21 +317,14 @@ class LH5Iterator(typing.Iterator):
|
|
327
317
|
)
|
328
318
|
return self.global_entry_list
|
329
319
|
|
330
|
-
def read(self, i_entry: int
|
331
|
-
"Read the nextlocal chunk of events, starting at
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
n_entries = self.buffer_len
|
336
|
-
elif n_entries == 0:
|
337
|
-
return self.lh5_buffer
|
338
|
-
elif n_entries > self.buffer_len:
|
339
|
-
msg = "n_entries cannot be larger than buffer_len"
|
340
|
-
raise ValueError(msg)
|
320
|
+
def read(self, i_entry: int) -> tuple[LGDO, int]:
|
321
|
+
"""Read the nextlocal chunk of events, starting at i_entry. Return the
|
322
|
+
LH5 buffer and number of rows read."""
|
323
|
+
self.n_rows = 0
|
324
|
+
i_file = np.searchsorted(self.entry_map, i_entry, "right")
|
341
325
|
|
342
326
|
# if file hasn't been opened yet, search through files
|
343
327
|
# sequentially until we find the right one
|
344
|
-
i_file = np.searchsorted(self.entry_map, i_entry, "right")
|
345
328
|
if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
|
346
329
|
while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
|
347
330
|
i_file
|
@@ -349,10 +332,10 @@ class LH5Iterator(typing.Iterator):
|
|
349
332
|
i_file += 1
|
350
333
|
|
351
334
|
if i_file == len(self.lh5_files):
|
352
|
-
return self.lh5_buffer
|
335
|
+
return (self.lh5_buffer, self.n_rows)
|
353
336
|
local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
|
354
337
|
|
355
|
-
while
|
338
|
+
while self.n_rows < self.buffer_len and i_file < len(self.file_map):
|
356
339
|
# Loop through files
|
357
340
|
local_idx = self.get_file_entrylist(i_file)
|
358
341
|
if local_idx is not None and len(local_idx) == 0:
|
@@ -361,17 +344,18 @@ class LH5Iterator(typing.Iterator):
|
|
361
344
|
continue
|
362
345
|
|
363
346
|
i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
|
364
|
-
self.lh5_buffer = self.lh5_st.read(
|
347
|
+
self.lh5_buffer, n_rows = self.lh5_st.read(
|
365
348
|
self.groups[i_file],
|
366
349
|
self.lh5_files[i_file],
|
367
350
|
start_row=i_local,
|
368
|
-
n_rows=
|
351
|
+
n_rows=self.buffer_len - self.n_rows,
|
369
352
|
idx=local_idx,
|
370
353
|
field_mask=self.field_mask,
|
371
354
|
obj_buf=self.lh5_buffer,
|
372
|
-
obj_buf_start=
|
355
|
+
obj_buf_start=self.n_rows,
|
373
356
|
)
|
374
357
|
|
358
|
+
self.n_rows += n_rows
|
375
359
|
i_file += 1
|
376
360
|
local_i_entry = 0
|
377
361
|
|
@@ -380,7 +364,7 @@ class LH5Iterator(typing.Iterator):
|
|
380
364
|
if self.friend is not None:
|
381
365
|
self.friend.read(i_entry)
|
382
366
|
|
383
|
-
return self.lh5_buffer
|
367
|
+
return (self.lh5_buffer, self.n_rows)
|
384
368
|
|
385
369
|
def reset_field_mask(self, mask):
|
386
370
|
"""Replaces the field mask of this iterator and any friends with mask"""
|
@@ -391,7 +375,7 @@ class LH5Iterator(typing.Iterator):
|
|
391
375
|
@property
|
392
376
|
def current_local_entries(self) -> NDArray[int]:
|
393
377
|
"""Return list of local file entries in buffer"""
|
394
|
-
cur_entries = np.zeros(
|
378
|
+
cur_entries = np.zeros(self.n_rows, dtype="int32")
|
395
379
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
396
380
|
file_start = self._get_file_cumentries(i_file - 1)
|
397
381
|
i_local = self.current_i_entry - file_start
|
@@ -418,7 +402,7 @@ class LH5Iterator(typing.Iterator):
|
|
418
402
|
@property
|
419
403
|
def current_global_entries(self) -> NDArray[int]:
|
420
404
|
"""Return list of local file entries in buffer"""
|
421
|
-
cur_entries = np.zeros(
|
405
|
+
cur_entries = np.zeros(self.n_rows, dtype="int32")
|
422
406
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
423
407
|
file_start = self._get_file_cumentries(i_file - 1)
|
424
408
|
i_local = self.current_i_entry - file_start
|
@@ -449,7 +433,7 @@ class LH5Iterator(typing.Iterator):
|
|
449
433
|
@property
|
450
434
|
def current_files(self) -> NDArray[str]:
|
451
435
|
"""Return list of file names for entries in buffer"""
|
452
|
-
cur_files = np.zeros(
|
436
|
+
cur_files = np.zeros(self.n_rows, dtype=object)
|
453
437
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
454
438
|
file_start = self._get_file_cumentries(i_file - 1)
|
455
439
|
i_local = self.current_i_entry - file_start
|
@@ -471,7 +455,7 @@ class LH5Iterator(typing.Iterator):
|
|
471
455
|
@property
|
472
456
|
def current_groups(self) -> NDArray[str]:
|
473
457
|
"""Return list of group names for entries in buffer"""
|
474
|
-
cur_groups = np.zeros(
|
458
|
+
cur_groups = np.zeros(self.n_rows, dtype=object)
|
475
459
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
476
460
|
file_start = self._get_file_cumentries(i_file - 1)
|
477
461
|
i_local = self.current_i_entry - file_start
|
@@ -501,19 +485,14 @@ class LH5Iterator(typing.Iterator):
|
|
501
485
|
def __iter__(self) -> typing.Iterator:
|
502
486
|
"""Loop through entries in blocks of size buffer_len."""
|
503
487
|
self.current_i_entry = 0
|
504
|
-
self.next_i_entry =
|
488
|
+
self.next_i_entry = 0
|
505
489
|
return self
|
506
490
|
|
507
491
|
def __next__(self) -> tuple[LGDO, int, int]:
|
508
|
-
"""Read next buffer_len entries and return lh5_table
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
)
|
514
|
-
|
515
|
-
buf = self.read(self.next_i_entry, n_entries)
|
516
|
-
if len(buf) == 0:
|
492
|
+
"""Read next buffer_len entries and return lh5_table, iterator entry
|
493
|
+
and n_rows read."""
|
494
|
+
buf, n_rows = self.read(self.next_i_entry)
|
495
|
+
self.next_i_entry = self.current_i_entry + n_rows
|
496
|
+
if n_rows == 0:
|
517
497
|
raise StopIteration
|
518
|
-
|
519
|
-
return buf
|
498
|
+
return (buf, self.current_i_entry, n_rows)
|