legend-pydataobj 1.11.13__tar.gz → 1.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/PKG-INFO +1 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/pyproject.toml +2 -2
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/PKG-INFO +1 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/__init__.py +5 -4
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/_version.py +2 -2
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/__init__.py +1 -3
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/composite.py +1 -3
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/write/composite.py +2 -2
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/concat.py +3 -9
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/core.py +33 -32
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/iterator.py +48 -27
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/store.py +22 -75
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/tools.py +0 -111
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/utils.py +6 -4
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/array.py +84 -15
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/encoded.py +25 -20
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/histogram.py +1 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/lgdo.py +50 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/table.py +50 -28
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/vectorofvectors.py +132 -94
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/vovutils.py +14 -4
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/waveformtable.py +19 -21
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/conftest.py +1 -2
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/test_radware_sigcompress.py +3 -3
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/conftest.py +3 -3
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/conftest.py +1 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_concat.py +10 -10
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_core.py +47 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_lh5_iterator.py +48 -15
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_lh5_store.py +133 -100
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_lh5_utils.py +9 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_lh5_write.py +36 -40
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_array.py +29 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_histogram.py +1 -1
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_table.py +59 -16
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_vectorofvectors.py +121 -27
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_vovutils.py +52 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_waveformtable.py +13 -0
- legend_pydataobj-1.11.13/src/lgdo/lh5_store.py +0 -284
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/LICENSE +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/README.md +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/setup.cfg +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/requires.txt +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/cli.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/compression/__init__.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/compression/base.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/compression/generic.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/compression/radware.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/compression/utils.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/compression/varlen.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lgdo_utils.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/__init__.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/array.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/encoded.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/ndarray.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/scalar.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/utils.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/write/array.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/write/scalar.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/datatype.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/exceptions.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/settings.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/logging.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/__init__.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/fixedsizearray.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/scalar.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/types/struct.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/units.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/utils.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/test_compression.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/test_str2wfcodec.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_exceptions.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_lh5_datatype.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/lh5/test_lh5_tools.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/test_cli.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/test_lgdo_utils.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_encoded.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_fixedsizearray.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_representations.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_scalar.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_struct.py +0 -0
- {legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/tests/types/test_table_eval.py +0 -0
@@ -122,7 +122,7 @@ extend-select = [
|
|
122
122
|
"PIE", # flake8-pie
|
123
123
|
"PL", # pylint
|
124
124
|
"PT", # flake8-pytest-style
|
125
|
-
|
125
|
+
"PTH", # flake8-use-pathlib
|
126
126
|
"RET", # flake8-return
|
127
127
|
"RUF", # Ruff-specific
|
128
128
|
"SIM", # flake8-simplify
|
@@ -167,7 +167,7 @@ minversion = "6.0"
|
|
167
167
|
addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
|
168
168
|
xfail_strict = true
|
169
169
|
filterwarnings = ["error", 'ignore:\nPyarrow:DeprecationWarning']
|
170
|
-
log_cli_level = "
|
170
|
+
log_cli_level = "INFO"
|
171
171
|
testpaths = "tests"
|
172
172
|
|
173
173
|
[tool.codespell]
|
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
|
45
45
|
from __future__ import annotations
|
46
46
|
|
47
47
|
from ._version import version as __version__
|
48
|
-
from .
|
48
|
+
from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
|
49
49
|
from .types import (
|
50
50
|
LGDO,
|
51
51
|
Array,
|
@@ -69,7 +69,6 @@ __all__ = [
|
|
69
69
|
"FixedSizeArray",
|
70
70
|
"Histogram",
|
71
71
|
"LH5Iterator",
|
72
|
-
"LH5Store",
|
73
72
|
"Scalar",
|
74
73
|
"Struct",
|
75
74
|
"Table",
|
@@ -77,8 +76,10 @@ __all__ = [
|
|
77
76
|
"VectorOfVectors",
|
78
77
|
"WaveformTable",
|
79
78
|
"__version__",
|
80
|
-
"load_dfs",
|
81
|
-
"load_nda",
|
82
79
|
"ls",
|
80
|
+
"read",
|
81
|
+
"read_as",
|
82
|
+
"read_n_rows",
|
83
83
|
"show",
|
84
|
+
"write",
|
84
85
|
]
|
@@ -10,7 +10,7 @@ from __future__ import annotations
|
|
10
10
|
from .core import read, read_as, write
|
11
11
|
from .iterator import LH5Iterator
|
12
12
|
from .store import LH5Store
|
13
|
-
from .tools import
|
13
|
+
from .tools import ls, show
|
14
14
|
from .utils import read_n_rows
|
15
15
|
|
16
16
|
__all__ = [
|
@@ -18,8 +18,6 @@ __all__ = [
|
|
18
18
|
"LH5Store",
|
19
19
|
"concat",
|
20
20
|
"default_hdf5_settings",
|
21
|
-
"load_dfs",
|
22
|
-
"load_nda",
|
23
21
|
"ls",
|
24
22
|
"read",
|
25
23
|
"read_as",
|
{legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/read/composite.py
RENAMED
@@ -353,15 +353,13 @@ def _h5_read_table(
|
|
353
353
|
table = Table(col_dict=col_dict, attrs=attrs)
|
354
354
|
|
355
355
|
# set (write) loc to end of tree
|
356
|
-
table.
|
356
|
+
table.resize(do_warn=True)
|
357
357
|
return table, n_rows_read
|
358
358
|
|
359
359
|
# We have read all fields into the object buffer. Run
|
360
360
|
# checks: All columns should be the same size. So update
|
361
361
|
# table's size as necessary, warn if any mismatches are found
|
362
362
|
obj_buf.resize(do_warn=True)
|
363
|
-
# set (write) loc to end of tree
|
364
|
-
obj_buf.loc = obj_buf_start + n_rows_read
|
365
363
|
|
366
364
|
# check attributes
|
367
365
|
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
{legend_pydataobj-1.11.13 → legend_pydataobj-1.12.0}/src/lgdo/lh5/_serializers/write/composite.py
RENAMED
@@ -1,8 +1,8 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import logging
|
4
|
-
import os
|
5
4
|
from inspect import signature
|
5
|
+
from pathlib import Path
|
6
6
|
|
7
7
|
import h5py
|
8
8
|
|
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
|
|
53
53
|
# change any object in the file. So we use file:append for
|
54
54
|
# write_object:overwrite.
|
55
55
|
if not isinstance(lh5_file, h5py.File):
|
56
|
-
mode = "w" if wo_mode == "of" or not
|
56
|
+
mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
|
57
57
|
lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
|
58
58
|
|
59
59
|
log.debug(
|
@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
|
|
76
76
|
continue
|
77
77
|
|
78
78
|
# read as little as possible
|
79
|
-
obj
|
79
|
+
obj = store.read(current, h5f0, n_rows=1)
|
80
80
|
if isinstance(obj, (Table, Array, VectorOfVectors)):
|
81
81
|
lgdos.append(current)
|
82
82
|
|
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
|
|
139
139
|
_inplace_table_filter(key, val, obj_list)
|
140
140
|
|
141
141
|
|
142
|
-
def _slice(obj, n_rows):
|
143
|
-
ak_obj = obj.view_as("ak")[:n_rows]
|
144
|
-
obj_type = type(obj)
|
145
|
-
return obj_type(ak_obj)
|
146
|
-
|
147
|
-
|
148
142
|
def lh5concat(
|
149
143
|
lh5_files: list,
|
150
144
|
output: str,
|
@@ -186,8 +180,8 @@ def lh5concat(
|
|
186
180
|
# loop over lgdo objects
|
187
181
|
for lgdo in lgdos:
|
188
182
|
# iterate over the files
|
189
|
-
for lh5_obj
|
190
|
-
data = {lgdo:
|
183
|
+
for lh5_obj in LH5Iterator(lh5_files, lgdo):
|
184
|
+
data = {lgdo: lh5_obj}
|
191
185
|
|
192
186
|
# remove the nested fields
|
193
187
|
_remove_nested_fields(data, obj_list)
|
@@ -4,6 +4,8 @@ import bisect
|
|
4
4
|
import inspect
|
5
5
|
import sys
|
6
6
|
from collections.abc import Mapping, Sequence
|
7
|
+
from contextlib import suppress
|
8
|
+
from pathlib import Path
|
7
9
|
from typing import Any
|
8
10
|
|
9
11
|
import h5py
|
@@ -92,8 +94,7 @@ def read(
|
|
92
94
|
will be set to ``True``, while the rest will default to ``False``.
|
93
95
|
obj_buf
|
94
96
|
Read directly into memory provided in `obj_buf`. Note: the buffer
|
95
|
-
will be
|
96
|
-
buffer length, send in ``n_rows = len(obj_buf)``.
|
97
|
+
will be resized to accommodate the data retrieved.
|
97
98
|
obj_buf_start
|
98
99
|
Start location in ``obj_buf`` for read. For concatenating data to
|
99
100
|
array-like objects.
|
@@ -106,25 +107,25 @@ def read(
|
|
106
107
|
|
107
108
|
Returns
|
108
109
|
-------
|
109
|
-
|
110
|
-
|
111
|
-
successfully read out. Essential for arrays when the amount of data
|
112
|
-
is smaller than the object buffer. For scalars and structs
|
113
|
-
`n_rows_read` will be``1``. For tables it is redundant with
|
114
|
-
``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
|
110
|
+
object
|
111
|
+
the read-out object
|
115
112
|
"""
|
116
113
|
if isinstance(lh5_file, h5py.File):
|
117
114
|
lh5_obj = lh5_file[name]
|
118
115
|
elif isinstance(lh5_file, str):
|
119
116
|
lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
|
120
|
-
|
117
|
+
try:
|
118
|
+
lh5_obj = lh5_file[name]
|
119
|
+
except KeyError as ke:
|
120
|
+
err = f"Object {name} not found in file {lh5_file.filename}"
|
121
|
+
raise KeyError(err) from ke
|
121
122
|
else:
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
123
|
+
if obj_buf is not None:
|
124
|
+
obj_buf.resize(obj_buf_start)
|
125
|
+
else:
|
126
|
+
obj_buf_start = 0
|
126
127
|
|
127
|
-
for i, h5f in enumerate(
|
128
|
+
for i, h5f in enumerate(lh5_file):
|
128
129
|
if (
|
129
130
|
isinstance(idx, (list, tuple))
|
130
131
|
and len(idx) > 0
|
@@ -146,33 +147,26 @@ def read(
|
|
146
147
|
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
147
148
|
else:
|
148
149
|
idx_i = None
|
149
|
-
n_rows_i = n_rows - n_rows_read
|
150
150
|
|
151
|
-
|
151
|
+
obj_buf_start_i = len(obj_buf) if obj_buf else 0
|
152
|
+
n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
|
153
|
+
|
154
|
+
obj_buf = read(
|
152
155
|
name,
|
153
156
|
h5f,
|
154
|
-
start_row,
|
157
|
+
start_row if i == 0 else 0,
|
155
158
|
n_rows_i,
|
156
159
|
idx_i,
|
157
160
|
use_h5idx,
|
158
161
|
field_mask,
|
159
162
|
obj_buf,
|
160
|
-
|
163
|
+
obj_buf_start_i,
|
161
164
|
decompress,
|
162
165
|
)
|
163
|
-
if isinstance(obj_ret, tuple):
|
164
|
-
obj_buf, n_rows_read_i = obj_ret
|
165
|
-
obj_buf_is_new = True
|
166
|
-
else:
|
167
|
-
obj_buf = obj_ret
|
168
|
-
n_rows_read_i = len(obj_buf)
|
169
166
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
start_row = 0
|
174
|
-
obj_buf_start += n_rows_read_i
|
175
|
-
return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
|
167
|
+
if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
|
168
|
+
return obj_buf
|
169
|
+
return obj_buf
|
176
170
|
|
177
171
|
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
178
172
|
idx = idx[0]
|
@@ -192,8 +186,10 @@ def read(
|
|
192
186
|
obj_buf_start=obj_buf_start,
|
193
187
|
decompress=decompress,
|
194
188
|
)
|
189
|
+
with suppress(AttributeError):
|
190
|
+
obj.resize(obj_buf_start + n_rows_read)
|
195
191
|
|
196
|
-
return obj
|
192
|
+
return obj
|
197
193
|
|
198
194
|
|
199
195
|
def write(
|
@@ -295,7 +291,12 @@ def write(
|
|
295
291
|
datasets. **Note: `compression` Ignored if compression is specified
|
296
292
|
as an `obj` attribute.**
|
297
293
|
"""
|
298
|
-
|
294
|
+
|
295
|
+
if (
|
296
|
+
isinstance(lh5_file, str)
|
297
|
+
and not Path(lh5_file).is_file()
|
298
|
+
and wo_mode in ("w", "write_safe", "of", "overwrite_file")
|
299
|
+
):
|
299
300
|
h5py_kwargs.update(
|
300
301
|
{
|
301
302
|
"fs_strategy": "page",
|
@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
|
|
24
24
|
|
25
25
|
This can be used as an iterator:
|
26
26
|
|
27
|
-
|
27
|
+
|
28
|
+
>>> for lh5_obj in LH5Iterator(...):
|
28
29
|
>>> # do the thing!
|
29
30
|
|
30
31
|
This is intended for if you are reading a large quantity of data. This
|
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
|
|
42
43
|
In addition to accessing requested data via ``lh5_obj``, several
|
43
44
|
properties exist to tell you where that data came from:
|
44
45
|
|
46
|
+
- lh5_it.current_i_entry: get the index within the entry list of the
|
47
|
+
first entry that is currently read
|
45
48
|
- lh5_it.current_local_entries: get the entry numbers relative to the
|
46
49
|
file the data came from
|
47
50
|
- lh5_it.current_global_entries: get the entry number relative to the
|
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
|
|
49
52
|
- lh5_it.current_files: get the file name corresponding to each entry
|
50
53
|
- lh5_it.current_groups: get the group name corresponding to each entry
|
51
54
|
|
52
|
-
This class can also be used
|
55
|
+
This class can also be used for random access:
|
53
56
|
|
54
|
-
>>> lh5_obj
|
57
|
+
>>> lh5_obj = lh5_it.read(i_entry)
|
55
58
|
|
56
59
|
to read the block of entries starting at i_entry. In case of multiple files
|
57
60
|
or the use of an event selection, i_entry refers to a global event index
|
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
|
|
65
68
|
base_path: str = "",
|
66
69
|
entry_list: list[int] | list[list[int]] | None = None,
|
67
70
|
entry_mask: list[bool] | list[list[bool]] | None = None,
|
71
|
+
i_start: int = 0,
|
72
|
+
n_entries: int | None = None,
|
68
73
|
field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
|
69
74
|
buffer_len: int = "100*MB",
|
70
75
|
file_cache: int = 10,
|
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
|
|
89
94
|
entry_mask
|
90
95
|
mask of entries to read. If a list of arrays is provided, expect
|
91
96
|
one for each file. Ignore if a selection list is provided.
|
97
|
+
i_start
|
98
|
+
index of first entry to start at when iterating
|
99
|
+
n_entries
|
100
|
+
number of entries to read before terminating iteration
|
92
101
|
field_mask
|
93
102
|
mask of which fields to read. See :meth:`LH5Store.read` for
|
94
103
|
more details.
|
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
|
|
183
192
|
msg = f"can't open any files from {lh5_files}"
|
184
193
|
raise RuntimeError(msg)
|
185
194
|
|
186
|
-
self.
|
195
|
+
self.i_start = i_start
|
196
|
+
self.n_entries = n_entries
|
187
197
|
self.current_i_entry = 0
|
188
198
|
self.next_i_entry = 0
|
189
199
|
|
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
|
|
317
327
|
)
|
318
328
|
return self.global_entry_list
|
319
329
|
|
320
|
-
def read(self, i_entry: int) ->
|
321
|
-
"
|
322
|
-
|
323
|
-
|
324
|
-
|
330
|
+
def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
|
331
|
+
"Read the nextlocal chunk of events, starting at entry."
|
332
|
+
self.lh5_buffer.resize(0)
|
333
|
+
|
334
|
+
if n_entries is None:
|
335
|
+
n_entries = self.buffer_len
|
336
|
+
elif n_entries == 0:
|
337
|
+
return self.lh5_buffer
|
338
|
+
elif n_entries > self.buffer_len:
|
339
|
+
msg = "n_entries cannot be larger than buffer_len"
|
340
|
+
raise ValueError(msg)
|
325
341
|
|
326
342
|
# if file hasn't been opened yet, search through files
|
327
343
|
# sequentially until we find the right one
|
344
|
+
i_file = np.searchsorted(self.entry_map, i_entry, "right")
|
328
345
|
if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
|
329
346
|
while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
|
330
347
|
i_file
|
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
|
|
332
349
|
i_file += 1
|
333
350
|
|
334
351
|
if i_file == len(self.lh5_files):
|
335
|
-
return
|
352
|
+
return self.lh5_buffer
|
336
353
|
local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
|
337
354
|
|
338
|
-
while self.
|
355
|
+
while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
|
339
356
|
# Loop through files
|
340
357
|
local_idx = self.get_file_entrylist(i_file)
|
341
358
|
if local_idx is not None and len(local_idx) == 0:
|
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
|
|
344
361
|
continue
|
345
362
|
|
346
363
|
i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
|
347
|
-
self.lh5_buffer
|
364
|
+
self.lh5_buffer = self.lh5_st.read(
|
348
365
|
self.groups[i_file],
|
349
366
|
self.lh5_files[i_file],
|
350
367
|
start_row=i_local,
|
351
|
-
n_rows=
|
368
|
+
n_rows=n_entries - len(self.lh5_buffer),
|
352
369
|
idx=local_idx,
|
353
370
|
field_mask=self.field_mask,
|
354
371
|
obj_buf=self.lh5_buffer,
|
355
|
-
obj_buf_start=self.
|
372
|
+
obj_buf_start=len(self.lh5_buffer),
|
356
373
|
)
|
357
374
|
|
358
|
-
self.n_rows += n_rows
|
359
375
|
i_file += 1
|
360
376
|
local_i_entry = 0
|
361
377
|
|
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
|
|
364
380
|
if self.friend is not None:
|
365
381
|
self.friend.read(i_entry)
|
366
382
|
|
367
|
-
return
|
383
|
+
return self.lh5_buffer
|
368
384
|
|
369
385
|
def reset_field_mask(self, mask):
|
370
386
|
"""Replaces the field mask of this iterator and any friends with mask"""
|
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
|
|
375
391
|
@property
|
376
392
|
def current_local_entries(self) -> NDArray[int]:
|
377
393
|
"""Return list of local file entries in buffer"""
|
378
|
-
cur_entries = np.zeros(self.
|
394
|
+
cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
|
379
395
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
380
396
|
file_start = self._get_file_cumentries(i_file - 1)
|
381
397
|
i_local = self.current_i_entry - file_start
|
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
|
|
402
418
|
@property
|
403
419
|
def current_global_entries(self) -> NDArray[int]:
|
404
420
|
"""Return list of local file entries in buffer"""
|
405
|
-
cur_entries = np.zeros(self.
|
421
|
+
cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
|
406
422
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
407
423
|
file_start = self._get_file_cumentries(i_file - 1)
|
408
424
|
i_local = self.current_i_entry - file_start
|
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
|
|
433
449
|
@property
|
434
450
|
def current_files(self) -> NDArray[str]:
|
435
451
|
"""Return list of file names for entries in buffer"""
|
436
|
-
cur_files = np.zeros(self.
|
452
|
+
cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
|
437
453
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
438
454
|
file_start = self._get_file_cumentries(i_file - 1)
|
439
455
|
i_local = self.current_i_entry - file_start
|
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
|
|
455
471
|
@property
|
456
472
|
def current_groups(self) -> NDArray[str]:
|
457
473
|
"""Return list of group names for entries in buffer"""
|
458
|
-
cur_groups = np.zeros(self.
|
474
|
+
cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
|
459
475
|
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
460
476
|
file_start = self._get_file_cumentries(i_file - 1)
|
461
477
|
i_local = self.current_i_entry - file_start
|
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
|
|
485
501
|
def __iter__(self) -> typing.Iterator:
|
486
502
|
"""Loop through entries in blocks of size buffer_len."""
|
487
503
|
self.current_i_entry = 0
|
488
|
-
self.next_i_entry =
|
504
|
+
self.next_i_entry = self.i_start
|
489
505
|
return self
|
490
506
|
|
491
507
|
def __next__(self) -> tuple[LGDO, int, int]:
|
492
|
-
"""Read next buffer_len entries and return lh5_table
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
508
|
+
"""Read next buffer_len entries and return lh5_table and iterator entry."""
|
509
|
+
n_entries = self.n_entries
|
510
|
+
if n_entries is not None:
|
511
|
+
n_entries = min(
|
512
|
+
self.buffer_len, n_entries + self.i_start - self.next_i_entry
|
513
|
+
)
|
514
|
+
|
515
|
+
buf = self.read(self.next_i_entry, n_entries)
|
516
|
+
if len(buf) == 0:
|
497
517
|
raise StopIteration
|
498
|
-
|
518
|
+
self.next_i_entry = self.current_i_entry + len(buf)
|
519
|
+
return buf
|
@@ -5,21 +5,20 @@ HDF5 files.
|
|
5
5
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
-
import bisect
|
9
8
|
import logging
|
10
|
-
import os
|
11
9
|
import sys
|
12
10
|
from collections import OrderedDict
|
13
11
|
from collections.abc import Mapping, Sequence
|
14
12
|
from inspect import signature
|
13
|
+
from pathlib import Path
|
15
14
|
from typing import Any
|
16
15
|
|
17
16
|
import h5py
|
18
|
-
import numpy as np
|
19
17
|
from numpy.typing import ArrayLike
|
20
18
|
|
21
19
|
from .. import types
|
22
20
|
from . import _serializers, utils
|
21
|
+
from .core import read
|
23
22
|
|
24
23
|
log = logging.getLogger(__name__)
|
25
24
|
|
@@ -93,16 +92,16 @@ class LH5Store:
|
|
93
92
|
return self.files[lh5_file]
|
94
93
|
|
95
94
|
if self.base_path != "":
|
96
|
-
full_path =
|
95
|
+
full_path = Path(self.base_path) / lh5_file
|
97
96
|
else:
|
98
|
-
full_path = lh5_file
|
97
|
+
full_path = Path(lh5_file)
|
99
98
|
|
100
|
-
file_exists =
|
99
|
+
file_exists = full_path.exists()
|
101
100
|
if mode != "r":
|
102
|
-
directory =
|
103
|
-
if directory != "" and not
|
101
|
+
directory = full_path.parent
|
102
|
+
if directory != "" and not full_path.parent.exists():
|
104
103
|
log.debug(f"making path {directory}")
|
105
|
-
|
104
|
+
directory.mkdir(parents=True, exist_ok=True)
|
106
105
|
|
107
106
|
if mode == "r" and not file_exists:
|
108
107
|
msg = f"file {full_path} not found"
|
@@ -155,7 +154,7 @@ class LH5Store:
|
|
155
154
|
"""Returns an LH5 object appropriate for use as a pre-allocated buffer
|
156
155
|
in a read loop. Sets size to `size` if object has a size.
|
157
156
|
"""
|
158
|
-
obj
|
157
|
+
obj = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
|
159
158
|
if hasattr(obj, "resize") and size is not None:
|
160
159
|
obj.resize(new_size=size)
|
161
160
|
return obj
|
@@ -182,72 +181,20 @@ class LH5Store:
|
|
182
181
|
"""
|
183
182
|
# grab files from store
|
184
183
|
if isinstance(lh5_file, (str, h5py.File)):
|
185
|
-
|
184
|
+
h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
|
186
185
|
else:
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
# make idx a proper tuple if it's not one already
|
200
|
-
if not (isinstance(idx, tuple) and len(idx) == 1):
|
201
|
-
idx = (idx,)
|
202
|
-
# idx is a long continuous array
|
203
|
-
n_rows_i = utils.read_n_rows(name, h5f)
|
204
|
-
# find the length of the subset of idx that contains indices
|
205
|
-
# that are less than n_rows_i
|
206
|
-
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
207
|
-
# now split idx into idx_i and the remainder
|
208
|
-
idx_i = np.array(idx[0])[:n_rows_to_read_i]
|
209
|
-
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
210
|
-
else:
|
211
|
-
idx_i = None
|
212
|
-
n_rows_i = n_rows - n_rows_read
|
213
|
-
|
214
|
-
obj_buf, n_rows_read_i = self.read(
|
215
|
-
name,
|
216
|
-
h5f,
|
217
|
-
start_row,
|
218
|
-
n_rows_i,
|
219
|
-
idx_i,
|
220
|
-
use_h5idx,
|
221
|
-
field_mask,
|
222
|
-
obj_buf,
|
223
|
-
obj_buf_start,
|
224
|
-
decompress,
|
225
|
-
)
|
226
|
-
|
227
|
-
n_rows_read += n_rows_read_i
|
228
|
-
if n_rows_read >= n_rows or obj_buf is None:
|
229
|
-
return obj_buf, n_rows_read
|
230
|
-
start_row = 0
|
231
|
-
obj_buf_start += n_rows_read_i
|
232
|
-
return obj_buf, n_rows_read
|
233
|
-
|
234
|
-
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
235
|
-
idx = idx[0]
|
236
|
-
if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
|
237
|
-
idx = np.where(idx)[0]
|
238
|
-
|
239
|
-
return _serializers._h5_read_lgdo(
|
240
|
-
lh5_obj.id,
|
241
|
-
lh5_obj.file.filename,
|
242
|
-
lh5_obj.name,
|
243
|
-
start_row=start_row,
|
244
|
-
n_rows=n_rows,
|
245
|
-
idx=idx,
|
246
|
-
use_h5idx=use_h5idx,
|
247
|
-
field_mask=field_mask,
|
248
|
-
obj_buf=obj_buf,
|
249
|
-
obj_buf_start=obj_buf_start,
|
250
|
-
decompress=decompress,
|
186
|
+
h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
|
187
|
+
return read(
|
188
|
+
name,
|
189
|
+
h5f,
|
190
|
+
start_row,
|
191
|
+
n_rows,
|
192
|
+
idx,
|
193
|
+
use_h5idx,
|
194
|
+
field_mask,
|
195
|
+
obj_buf,
|
196
|
+
obj_buf_start,
|
197
|
+
decompress,
|
251
198
|
)
|
252
199
|
|
253
200
|
def write(
|