legend-pydataobj 1.11.12__py3-none-any.whl → 1.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/METADATA +1 -1
- {legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/RECORD +27 -27
- {legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/WHEEL +1 -1
- lgdo/__init__.py +5 -4
- lgdo/_version.py +2 -2
- lgdo/lh5/__init__.py +3 -5
- lgdo/lh5/_serializers/read/composite.py +1 -3
- lgdo/lh5/_serializers/write/array.py +2 -3
- lgdo/lh5/_serializers/write/composite.py +2 -2
- lgdo/lh5/concat.py +3 -9
- lgdo/lh5/core.py +33 -32
- lgdo/lh5/iterator.py +48 -27
- lgdo/lh5/settings.py +34 -0
- lgdo/lh5/store.py +22 -75
- lgdo/lh5/tools.py +0 -111
- lgdo/lh5/utils.py +6 -4
- lgdo/types/array.py +84 -15
- lgdo/types/encoded.py +25 -20
- lgdo/types/histogram.py +1 -1
- lgdo/types/lgdo.py +50 -0
- lgdo/types/table.py +50 -28
- lgdo/types/vectorofvectors.py +132 -94
- lgdo/types/vovutils.py +14 -4
- lgdo/types/waveformtable.py +19 -21
- lgdo/lh5_store.py +0 -284
- {legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/licenses/LICENSE +0 -0
- {legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,8 @@
|
|
1
|
-
legend_pydataobj-1.
|
2
|
-
lgdo/__init__.py,sha256=
|
3
|
-
lgdo/_version.py,sha256=
|
1
|
+
legend_pydataobj-1.12.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
2
|
+
lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
|
3
|
+
lgdo/_version.py,sha256=X7AXkrxMLYa0fUCdwZA2oOfiFkQJiuenTXzRghkc4eU,513
|
4
4
|
lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
|
5
5
|
lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
|
6
|
-
lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
|
7
6
|
lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
|
8
7
|
lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
|
9
8
|
lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
|
@@ -13,44 +12,45 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
|
|
13
12
|
lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
|
14
13
|
lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
|
15
14
|
lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
|
16
|
-
lgdo/lh5/__init__.py,sha256=
|
17
|
-
lgdo/lh5/concat.py,sha256=
|
18
|
-
lgdo/lh5/core.py,sha256=
|
15
|
+
lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
|
16
|
+
lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
|
17
|
+
lgdo/lh5/core.py,sha256=U0ZZk6EmojRRYFBEo_bMy7jZ3SKBU41MIsSulyFxZIU,13752
|
19
18
|
lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
|
20
19
|
lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
|
21
|
-
lgdo/lh5/iterator.py,sha256=
|
22
|
-
lgdo/lh5/
|
23
|
-
lgdo/lh5/
|
24
|
-
lgdo/lh5/
|
20
|
+
lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
|
21
|
+
lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
|
22
|
+
lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
|
23
|
+
lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
|
24
|
+
lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
|
25
25
|
lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
|
26
26
|
lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
27
27
|
lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
|
28
|
-
lgdo/lh5/_serializers/read/composite.py,sha256=
|
28
|
+
lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
|
29
29
|
lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
|
30
30
|
lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
|
31
31
|
lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
|
32
32
|
lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
|
33
33
|
lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
|
34
34
|
lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
35
|
-
lgdo/lh5/_serializers/write/array.py,sha256=
|
36
|
-
lgdo/lh5/_serializers/write/composite.py,sha256=
|
35
|
+
lgdo/lh5/_serializers/write/array.py,sha256=gAB1EjPa9YojPqohVwY_VPeuY7_bLN-lttCmfgty-qk,3175
|
36
|
+
lgdo/lh5/_serializers/write/composite.py,sha256=eX5an6YZ5I7zf1z90mfzKYYJQoS-ux10rjDrUdevW6Y,10025
|
37
37
|
lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
|
38
38
|
lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
|
39
39
|
lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
|
40
|
-
lgdo/types/array.py,sha256=
|
40
|
+
lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
|
41
41
|
lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
|
42
|
-
lgdo/types/encoded.py,sha256=
|
42
|
+
lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
|
43
43
|
lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
|
44
|
-
lgdo/types/histogram.py,sha256=
|
45
|
-
lgdo/types/lgdo.py,sha256=
|
44
|
+
lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
|
45
|
+
lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
|
46
46
|
lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
|
47
47
|
lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
|
48
|
-
lgdo/types/table.py,sha256=
|
49
|
-
lgdo/types/vectorofvectors.py,sha256=
|
50
|
-
lgdo/types/vovutils.py,sha256=
|
51
|
-
lgdo/types/waveformtable.py,sha256=
|
52
|
-
legend_pydataobj-1.
|
53
|
-
legend_pydataobj-1.
|
54
|
-
legend_pydataobj-1.
|
55
|
-
legend_pydataobj-1.
|
56
|
-
legend_pydataobj-1.
|
48
|
+
lgdo/types/table.py,sha256=hvOwhFkm-_CkNhGmD8SJoeepZcwFY6ItYOS76LztKtA,20158
|
49
|
+
lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
|
50
|
+
lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
|
51
|
+
lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
|
52
|
+
legend_pydataobj-1.12.0.dist-info/METADATA,sha256=76785CT-1QRlVf6WOFnbnRWUiC6zSUnMxFR2km15kQ4,44443
|
53
|
+
legend_pydataobj-1.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
54
|
+
legend_pydataobj-1.12.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
|
55
|
+
legend_pydataobj-1.12.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
56
|
+
legend_pydataobj-1.12.0.dist-info/RECORD,,
|
lgdo/__init__.py
CHANGED
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
|
45
45
|
from __future__ import annotations
|
46
46
|
|
47
47
|
from ._version import version as __version__
|
48
|
-
from .
|
48
|
+
from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
|
49
49
|
from .types import (
|
50
50
|
LGDO,
|
51
51
|
Array,
|
@@ -69,7 +69,6 @@ __all__ = [
|
|
69
69
|
"FixedSizeArray",
|
70
70
|
"Histogram",
|
71
71
|
"LH5Iterator",
|
72
|
-
"LH5Store",
|
73
72
|
"Scalar",
|
74
73
|
"Struct",
|
75
74
|
"Table",
|
@@ -77,8 +76,10 @@ __all__ = [
|
|
77
76
|
"VectorOfVectors",
|
78
77
|
"WaveformTable",
|
79
78
|
"__version__",
|
80
|
-
"load_dfs",
|
81
|
-
"load_nda",
|
82
79
|
"ls",
|
80
|
+
"read",
|
81
|
+
"read_as",
|
82
|
+
"read_n_rows",
|
83
83
|
"show",
|
84
|
+
"write",
|
84
85
|
]
|
lgdo/_version.py
CHANGED
lgdo/lh5/__init__.py
CHANGED
@@ -7,24 +7,22 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
|
7
7
|
|
8
8
|
from __future__ import annotations
|
9
9
|
|
10
|
-
from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
|
11
10
|
from .core import read, read_as, write
|
12
11
|
from .iterator import LH5Iterator
|
13
12
|
from .store import LH5Store
|
14
|
-
from .tools import
|
13
|
+
from .tools import ls, show
|
15
14
|
from .utils import read_n_rows
|
16
15
|
|
17
16
|
__all__ = [
|
18
|
-
"DEFAULT_HDF5_SETTINGS",
|
19
17
|
"LH5Iterator",
|
20
18
|
"LH5Store",
|
21
19
|
"concat",
|
22
|
-
"
|
23
|
-
"load_nda",
|
20
|
+
"default_hdf5_settings",
|
24
21
|
"ls",
|
25
22
|
"read",
|
26
23
|
"read_as",
|
27
24
|
"read_n_rows",
|
25
|
+
"reset_default_hdf5_settings",
|
28
26
|
"show",
|
29
27
|
"write",
|
30
28
|
]
|
@@ -353,15 +353,13 @@ def _h5_read_table(
|
|
353
353
|
table = Table(col_dict=col_dict, attrs=attrs)
|
354
354
|
|
355
355
|
# set (write) loc to end of tree
|
356
|
-
table.
|
356
|
+
table.resize(do_warn=True)
|
357
357
|
return table, n_rows_read
|
358
358
|
|
359
359
|
# We have read all fields into the object buffer. Run
|
360
360
|
# checks: All columns should be the same size. So update
|
361
361
|
# table's size as necessary, warn if any mismatches are found
|
362
362
|
obj_buf.resize(do_warn=True)
|
363
|
-
# set (write) loc to end of tree
|
364
|
-
obj_buf.loc = obj_buf_start + n_rows_read
|
365
363
|
|
366
364
|
# check attributes
|
367
365
|
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
@@ -6,12 +6,11 @@ import h5py
|
|
6
6
|
import numpy as np
|
7
7
|
|
8
8
|
from .... import types
|
9
|
+
from ... import settings
|
9
10
|
from ...exceptions import LH5EncodeError
|
10
11
|
|
11
12
|
log = logging.getLogger(__name__)
|
12
13
|
|
13
|
-
DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
|
14
|
-
|
15
14
|
|
16
15
|
def _h5_write_array(
|
17
16
|
obj,
|
@@ -49,7 +48,7 @@ def _h5_write_array(
|
|
49
48
|
del group[name]
|
50
49
|
|
51
50
|
# set default compression options
|
52
|
-
for k, v in DEFAULT_HDF5_SETTINGS.items():
|
51
|
+
for k, v in settings.DEFAULT_HDF5_SETTINGS.items():
|
53
52
|
h5py_kwargs.setdefault(k, v)
|
54
53
|
|
55
54
|
# compress using the 'compression' LGDO attribute, if available
|
@@ -1,8 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
-
import os
|
5
4
|
from inspect import signature
|
5
|
+
from pathlib import Path
|
6
6
|
|
7
7
|
import h5py
|
8
8
|
|
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
|
|
53
53
|
# change any object in the file. So we use file:append for
|
54
54
|
# write_object:overwrite.
|
55
55
|
if not isinstance(lh5_file, h5py.File):
|
56
|
-
mode = "w" if wo_mode == "of" or not
|
56
|
+
mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
|
57
57
|
lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
|
58
58
|
|
59
59
|
log.debug(
|
lgdo/lh5/concat.py
CHANGED
@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
|
|
76
76
|
continue
|
77
77
|
|
78
78
|
# read as little as possible
|
79
|
-
obj
|
79
|
+
obj = store.read(current, h5f0, n_rows=1)
|
80
80
|
if isinstance(obj, (Table, Array, VectorOfVectors)):
|
81
81
|
lgdos.append(current)
|
82
82
|
|
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
|
|
139
139
|
_inplace_table_filter(key, val, obj_list)
|
140
140
|
|
141
141
|
|
142
|
-
def _slice(obj, n_rows):
|
143
|
-
ak_obj = obj.view_as("ak")[:n_rows]
|
144
|
-
obj_type = type(obj)
|
145
|
-
return obj_type(ak_obj)
|
146
|
-
|
147
|
-
|
148
142
|
def lh5concat(
|
149
143
|
lh5_files: list,
|
150
144
|
output: str,
|
@@ -186,8 +180,8 @@ def lh5concat(
|
|
186
180
|
# loop over lgdo objects
|
187
181
|
for lgdo in lgdos:
|
188
182
|
# iterate over the files
|
189
|
-
for lh5_obj
|
190
|
-
data = {lgdo:
|
183
|
+
for lh5_obj in LH5Iterator(lh5_files, lgdo):
|
184
|
+
data = {lgdo: lh5_obj}
|
191
185
|
|
192
186
|
# remove the nested fields
|
193
187
|
_remove_nested_fields(data, obj_list)
|
lgdo/lh5/core.py
CHANGED
@@ -4,6 +4,8 @@ import bisect
|
|
4
4
|
import inspect
|
5
5
|
import sys
|
6
6
|
from collections.abc import Mapping, Sequence
|
7
|
+
from contextlib import suppress
|
8
|
+
from pathlib import Path
|
7
9
|
from typing import Any
|
8
10
|
|
9
11
|
import h5py
|
@@ -92,8 +94,7 @@ def read(
|
|
92
94
|
will be set to ``True``, while the rest will default to ``False``.
|
93
95
|
obj_buf
|
94
96
|
Read directly into memory provided in `obj_buf`. Note: the buffer
|
95
|
-
will be
|
96
|
-
buffer length, send in ``n_rows = len(obj_buf)``.
|
97
|
+
will be resized to accommodate the data retrieved.
|
97
98
|
obj_buf_start
|
98
99
|
Start location in ``obj_buf`` for read. For concatenating data to
|
99
100
|
array-like objects.
|
@@ -106,25 +107,25 @@ def read(
|
|
106
107
|
|
107
108
|
Returns
|
108
109
|
-------
|
109
|
-
|
110
|
-
|
111
|
-
successfully read out. Essential for arrays when the amount of data
|
112
|
-
is smaller than the object buffer. For scalars and structs
|
113
|
-
`n_rows_read` will be``1``. For tables it is redundant with
|
114
|
-
``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
|
110
|
+
object
|
111
|
+
the read-out object
|
115
112
|
"""
|
116
113
|
if isinstance(lh5_file, h5py.File):
|
117
114
|
lh5_obj = lh5_file[name]
|
118
115
|
elif isinstance(lh5_file, str):
|
119
116
|
lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
|
120
|
-
|
117
|
+
try:
|
118
|
+
lh5_obj = lh5_file[name]
|
119
|
+
except KeyError as ke:
|
120
|
+
err = f"Object {name} not found in file {lh5_file.filename}"
|
121
|
+
raise KeyError(err) from ke
|
121
122
|
else:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
123
|
+
if obj_buf is not None:
|
124
|
+
obj_buf.resize(obj_buf_start)
|
125
|
+
else:
|
126
|
+
obj_buf_start = 0
|
126
127
|
|
127
|
-
for i, h5f in enumerate(
|
128
|
+
for i, h5f in enumerate(lh5_file):
|
128
129
|
if (
|
129
130
|
isinstance(idx, (list, tuple))
|
130
131
|
and len(idx) > 0
|
@@ -146,33 +147,26 @@ def read(
|
|
146
147
|
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
147
148
|
else:
|
148
149
|
idx_i = None
|
149
|
-
n_rows_i = n_rows - n_rows_read
|
150
150
|
|
151
|
-
|
151
|
+
obj_buf_start_i = len(obj_buf) if obj_buf else 0
|
152
|
+
n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
|
153
|
+
|
154
|
+
obj_buf = read(
|
152
155
|
name,
|
153
156
|
h5f,
|
154
|
-
start_row,
|
157
|
+
start_row if i == 0 else 0,
|
155
158
|
n_rows_i,
|
156
159
|
idx_i,
|
157
160
|
use_h5idx,
|
158
161
|
field_mask,
|
159
162
|
obj_buf,
|
160
|
-
|
163
|
+
obj_buf_start_i,
|
161
164
|
decompress,
|
162
165
|
)
|
163
|
-
if isinstance(obj_ret, tuple):
|
164
|
-
obj_buf, n_rows_read_i = obj_ret
|
165
|
-
obj_buf_is_new = True
|
166
|
-
else:
|
167
|
-
obj_buf = obj_ret
|
168
|
-
n_rows_read_i = len(obj_buf)
|
169
166
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
start_row = 0
|
174
|
-
obj_buf_start += n_rows_read_i
|
175
|
-
return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
|
167
|
+
if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
|
168
|
+
return obj_buf
|
169
|
+
return obj_buf
|
176
170
|
|
177
171
|
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
178
172
|
idx = idx[0]
|
@@ -192,8 +186,10 @@ def read(
|
|
192
186
|
obj_buf_start=obj_buf_start,
|
193
187
|
decompress=decompress,
|
194
188
|
)
|
189
|
+
with suppress(AttributeError):
|
190
|
+
obj.resize(obj_buf_start + n_rows_read)
|
195
191
|
|
196
|
-
return obj
|
192
|
+
return obj
|
197
193
|
|
198
194
|
|
199
195
|
def write(
|
@@ -295,7 +291,12 @@ def write(
|
|
295
291
|
datasets. **Note: `compression` Ignored if compression is specified
|
296
292
|
as an `obj` attribute.**
|
297
293
|
"""
|
298
|
-
|
294
|
+
|
295
|
+
if (
|
296
|
+
isinstance(lh5_file, str)
|
297
|
+
and not Path(lh5_file).is_file()
|
298
|
+
and wo_mode in ("w", "write_safe", "of", "overwrite_file")
|
299
|
+
):
|
299
300
|
h5py_kwargs.update(
|
300
301
|
{
|
301
302
|
"fs_strategy": "page",
|
lgdo/lh5/iterator.py
CHANGED
@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
|
|
24
24
|
|
25
25
|
This can be used as an iterator:
|
26
26
|
|
27
|
-
|
27
|
+
|
28
|
+
>>> for lh5_obj in LH5Iterator(...):
|
28
29
|
>>> # do the thing!
|
29
30
|
|
30
31
|
This is intended for if you are reading a large quantity of data. This
|
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
|
|
42
43
|
In addition to accessing requested data via ``lh5_obj``, several
|
43
44
|
properties exist to tell you where that data came from:
|
44
45
|
|
46
|
+
- lh5_it.current_i_entry: get the index within the entry list of the
|
47
|
+
first entry that is currently read
|
45
48
|
- lh5_it.current_local_entries: get the entry numbers relative to the
|
46
49
|
file the data came from
|
47
50
|
- lh5_it.current_global_entries: get the entry number relative to the
|
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
|
|
49
52
|
- lh5_it.current_files: get the file name corresponding to each entry
|
50
53
|
- lh5_it.current_groups: get the group name corresponding to each entry
|
51
54
|
|
52
|
-
This class can also be used
|
55
|
+
This class can also be used for random access:
|
53
56
|
|
54
|
-
>>> lh5_obj
|
57
|
+
>>> lh5_obj = lh5_it.read(i_entry)
|
55
58
|
|
56
59
|
to read the block of entries starting at i_entry. In case of multiple files
|
57
60
|
or the use of an event selection, i_entry refers to a global event index
|
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
|
|
65
68
|
base_path: str = "",
|
66
69
|
entry_list: list[int] | list[list[int]] | None = None,
|
67
70
|
entry_mask: list[bool] | list[list[bool]] | None = None,
|
71
|
+
i_start: int = 0,
|
72
|
+
n_entries: int | None = None,
|
68
73
|
field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
|
69
74
|
buffer_len: int = "100*MB",
|
70
75
|
file_cache: int = 10,
|
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
|
|
89
94
|
entry_mask
|
90
95
|
mask of entries to read. If a list of arrays is provided, expect
|
91
96
|
one for each file. Ignore if a selection list is provided.
|
97
|
+
i_start
|
98
|
+
index of first entry to start at when iterating
|
99
|
+
n_entries
|
100
|
+
number of entries to read before terminating iteration
|
92
101
|
field_mask
|
93
102
|
mask of which fields to read. See :meth:`LH5Store.read` for
|
94
103
|
more details.
|
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
|
|
183
192
|
msg = f"can't open any files from {lh5_files}"
|
184
193
|
raise RuntimeError(msg)
|
185
194
|
|
186
|
-
self.
|
195
|
+
self.i_start = i_start
|
196
|
+
self.n_entries = n_entries
|
187
197
|
self.current_i_entry = 0
|
188
198
|
self.next_i_entry = 0
|
189
199
|
|
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
|
|
317
327
|
)
|
318
328
|
return self.global_entry_list
|
319
329
|
|
320
|
-
def read(self, i_entry: int) ->
|
321
|
-
"
|
322
|
-
|
323
|
-
|
324
|
-
|
330
|
+
def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
|
331
|
+
"Read the nextlocal chunk of events, starting at entry."
|
332
|
+
self.lh5_buffer.resize(0)
|
333
|
+
|
334
|
+
if n_entries is None:
|
335
|
+
n_entries = self.buffer_len
|
336
|
+
elif n_entries == 0:
|
337
|
+
return self.lh5_buffer
|
338
|
+
elif n_entries > self.buffer_len:
|
339
|
+
msg = "n_entries cannot be larger than buffer_len"
|
340
|
+
raise ValueError(msg)
|
325
341
|
|
326
342
|
# if file hasn't been opened yet, search through files
|
327
343
|
# sequentially until we find the right one
|
344
|
+
i_file = np.searchsorted(self.entry_map, i_entry, "right")
|
328
345
|
if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
|
329
346
|
while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
|
330
347
|
i_file
|
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
|
|
332
349
|
i_file += 1
|
333
350
|
|
334
351
|
if i_file == len(self.lh5_files):
|
335
|
-
return
|
352
|
+
return self.lh5_buffer
|
336
353
|
local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
|
337
354
|
|
338
|
-
while self.
|
355
|
+
while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
|
339
356
|
# Loop through files
|
340
357
|
local_idx = self.get_file_entrylist(i_file)
|
341
358
|
if local_idx is not None and len(local_idx) == 0:
|
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
|
|
344
361
|
continue
|
345
362
|
|
346
363
|
i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
|
347
|
-
self.lh5_buffer
|
364
|
+
self.lh5_buffer = self.lh5_st.read(
|
348
365
|
self.groups[i_file],
|
349
366
|
self.lh5_files[i_file],
|
350
367
|
start_row=i_local,
|
351
|
-
n_rows=
|
368
|
+
n_rows=n_entries - len(self.lh5_buffer),
|
352
369
|
idx=local_idx,
|
353
370
|
field_mask=self.field_mask,
|
354
371
|
obj_buf=self.lh5_buffer,
|
355
|
-
obj_buf_start=self.
|
372
|
+
obj_buf_start=len(self.lh5_buffer),
|
356
373
|
)
|
357
374
|
|
358
|
-
self.n_rows += n_rows
|
359
375
|
i_file += 1
|
360
376
|
local_i_entry = 0
|
361
377
|
|
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
|
|
364
380
|
if self.friend is not None:
|
365
381
|
self.friend.read(i_entry)
|
366
382
|
|
367
|
-
return
|
383
|
+
return self.lh5_buffer
|
368
384
|
|
369
385
|
def reset_field_mask(self, mask):
|
370
386
|
"""Replaces the field mask of this iterator and any friends with mask"""
|
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
|
|
375
391
|
@property
|
376
392
|
def current_local_entries(self) -> NDArray[int]:
|
377
393
|
"""Return list of local file entries in buffer"""
|
378
|
-
cur_entries = np.zeros(self.
|
394
|
+
cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
|
379
395
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
380
396
|
file_start = self._get_file_cumentries(i_file - 1)
|
381
397
|
i_local = self.current_i_entry - file_start
|
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
|
|
402
418
|
@property
|
403
419
|
def current_global_entries(self) -> NDArray[int]:
|
404
420
|
"""Return list of local file entries in buffer"""
|
405
|
-
cur_entries = np.zeros(self.
|
421
|
+
cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
|
406
422
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
407
423
|
file_start = self._get_file_cumentries(i_file - 1)
|
408
424
|
i_local = self.current_i_entry - file_start
|
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
|
|
433
449
|
@property
|
434
450
|
def current_files(self) -> NDArray[str]:
|
435
451
|
"""Return list of file names for entries in buffer"""
|
436
|
-
cur_files = np.zeros(self.
|
452
|
+
cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
|
437
453
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
438
454
|
file_start = self._get_file_cumentries(i_file - 1)
|
439
455
|
i_local = self.current_i_entry - file_start
|
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
|
|
455
471
|
@property
|
456
472
|
def current_groups(self) -> NDArray[str]:
|
457
473
|
"""Return list of group names for entries in buffer"""
|
458
|
-
cur_groups = np.zeros(self.
|
474
|
+
cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
|
459
475
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
460
476
|
file_start = self._get_file_cumentries(i_file - 1)
|
461
477
|
i_local = self.current_i_entry - file_start
|
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
|
|
485
501
|
def __iter__(self) -> typing.Iterator:
|
486
502
|
"""Loop through entries in blocks of size buffer_len."""
|
487
503
|
self.current_i_entry = 0
|
488
|
-
self.next_i_entry =
|
504
|
+
self.next_i_entry = self.i_start
|
489
505
|
return self
|
490
506
|
|
491
507
|
def __next__(self) -> tuple[LGDO, int, int]:
|
492
|
-
"""Read next buffer_len entries and return lh5_table
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
508
|
+
"""Read next buffer_len entries and return lh5_table and iterator entry."""
|
509
|
+
n_entries = self.n_entries
|
510
|
+
if n_entries is not None:
|
511
|
+
n_entries = min(
|
512
|
+
self.buffer_len, n_entries + self.i_start - self.next_i_entry
|
513
|
+
)
|
514
|
+
|
515
|
+
buf = self.read(self.next_i_entry, n_entries)
|
516
|
+
if len(buf) == 0:
|
497
517
|
raise StopIteration
|
498
|
-
|
518
|
+
self.next_i_entry = self.current_i_entry + len(buf)
|
519
|
+
return buf
|
lgdo/lh5/settings.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import Any
|
4
|
+
|
5
|
+
|
6
|
+
def default_hdf5_settings() -> dict[str, Any]:
|
7
|
+
"""Returns the HDF5 settings for writing data to disk to the pydataobj defaults.
|
8
|
+
|
9
|
+
Examples
|
10
|
+
--------
|
11
|
+
>>> from lgdo import lh5
|
12
|
+
>>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
|
13
|
+
>>> lh5.write(data, "data", "file.lh5") # compressed with LZF
|
14
|
+
>>> lh5.DEFAULT_HDF5_SETTINGS = lh5.default_hdf5_settings()
|
15
|
+
>>> lh5.write(data, "data", "file.lh5", "of") # compressed with default settings (GZIP)
|
16
|
+
"""
|
17
|
+
|
18
|
+
return {
|
19
|
+
"shuffle": True,
|
20
|
+
"compression": "gzip",
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
DEFAULT_HDF5_SETTINGS: dict[str, ...] = default_hdf5_settings()
|
25
|
+
"""Global dictionary storing the default HDF5 settings for writing data to disk.
|
26
|
+
|
27
|
+
Modify this global variable before writing data to disk with this package.
|
28
|
+
|
29
|
+
Examples
|
30
|
+
--------
|
31
|
+
>>> from lgdo import lh5
|
32
|
+
>>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
|
33
|
+
>>> lh5.write(data, "data", "file.lh5") # compressed with LZF
|
34
|
+
"""
|