legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/METADATA +2 -2
- legend_pydataobj-1.10.1.dist-info/RECORD +55 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/WHEEL +1 -1
- lgdo/_version.py +2 -2
- lgdo/compression/radware.py +8 -16
- lgdo/lh5/_serializers/read/array.py +9 -9
- lgdo/lh5/_serializers/read/composite.py +67 -78
- lgdo/lh5/_serializers/read/encoded.py +31 -9
- lgdo/lh5/_serializers/read/ndarray.py +55 -42
- lgdo/lh5/_serializers/read/scalar.py +10 -3
- lgdo/lh5/_serializers/read/utils.py +165 -3
- lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
- lgdo/lh5/_serializers/write/array.py +6 -1
- lgdo/lh5/_serializers/write/composite.py +14 -5
- lgdo/lh5/_serializers/write/scalar.py +6 -1
- lgdo/lh5/core.py +81 -7
- lgdo/lh5/exceptions.py +3 -3
- lgdo/lh5/iterator.py +258 -74
- lgdo/lh5/store.py +116 -12
- lgdo/lh5/tools.py +1 -1
- lgdo/lh5/utils.py +29 -44
- lgdo/types/histogram.py +122 -6
- lgdo/types/table.py +2 -2
- lgdo/types/vectorofvectors.py +1 -1
- legend_pydataobj-1.9.0.dist-info/RECORD +0 -55
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/top_level.txt +0 -0
lgdo/lh5/core.py
CHANGED
@@ -1,15 +1,18 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import bisect
|
3
4
|
import inspect
|
4
5
|
import sys
|
5
6
|
from collections.abc import Mapping, Sequence
|
6
7
|
from typing import Any
|
7
8
|
|
8
9
|
import h5py
|
10
|
+
import numpy as np
|
9
11
|
from numpy.typing import ArrayLike
|
10
12
|
|
11
13
|
from .. import types
|
12
14
|
from . import _serializers
|
15
|
+
from .utils import read_n_rows
|
13
16
|
|
14
17
|
|
15
18
|
def read(
|
@@ -23,6 +26,7 @@ def read(
|
|
23
26
|
obj_buf: types.LGDO = None,
|
24
27
|
obj_buf_start: int = 0,
|
25
28
|
decompress: bool = True,
|
29
|
+
locking: bool = False,
|
26
30
|
) -> types.LGDO | tuple[types.LGDO, int]:
|
27
31
|
"""Read LH5 object data from a file.
|
28
32
|
|
@@ -97,6 +101,8 @@ def read(
|
|
97
101
|
Decompress data encoded with LGDO's compression routines right
|
98
102
|
after reading. The option has no effect on data encoded with HDF5
|
99
103
|
built-in filters, which is always decompressed upstream by HDF5.
|
104
|
+
locking
|
105
|
+
Lock HDF5 file while reading
|
100
106
|
|
101
107
|
Returns
|
102
108
|
-------
|
@@ -110,17 +116,72 @@ def read(
|
|
110
116
|
if isinstance(lh5_file, h5py.File):
|
111
117
|
lh5_obj = lh5_file[name]
|
112
118
|
elif isinstance(lh5_file, str):
|
113
|
-
lh5_file = h5py.File(lh5_file, mode="r")
|
119
|
+
lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
|
114
120
|
lh5_obj = lh5_file[name]
|
115
121
|
else:
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
122
|
+
lh5_files = list(lh5_file)
|
123
|
+
n_rows_read = 0
|
124
|
+
obj_buf_is_new = False
|
125
|
+
|
126
|
+
for i, h5f in enumerate(lh5_files):
|
127
|
+
if (
|
128
|
+
isinstance(idx, (list, tuple))
|
129
|
+
and len(idx) > 0
|
130
|
+
and not np.isscalar(idx[0])
|
131
|
+
):
|
132
|
+
# a list of lists: must be one per file
|
133
|
+
idx_i = idx[i]
|
134
|
+
elif idx is not None:
|
135
|
+
# make idx a proper tuple if it's not one already
|
136
|
+
if not (isinstance(idx, tuple) and len(idx) == 1):
|
137
|
+
idx = (idx,)
|
138
|
+
# idx is a long continuous array
|
139
|
+
n_rows_i = read_n_rows(name, h5f)
|
140
|
+
# find the length of the subset of idx that contains indices
|
141
|
+
# that are less than n_rows_i
|
142
|
+
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
143
|
+
# now split idx into idx_i and the remainder
|
144
|
+
idx_i = np.array(idx[0])[:n_rows_to_read_i]
|
145
|
+
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
146
|
+
else:
|
147
|
+
idx_i = None
|
148
|
+
n_rows_i = n_rows - n_rows_read
|
149
|
+
|
150
|
+
obj_ret = read(
|
151
|
+
name,
|
152
|
+
h5f,
|
153
|
+
start_row,
|
154
|
+
n_rows_i,
|
155
|
+
idx_i,
|
156
|
+
use_h5idx,
|
157
|
+
field_mask,
|
158
|
+
obj_buf,
|
159
|
+
obj_buf_start,
|
160
|
+
decompress,
|
161
|
+
)
|
162
|
+
if isinstance(obj_ret, tuple):
|
163
|
+
obj_buf, n_rows_read_i = obj_ret
|
164
|
+
obj_buf_is_new = True
|
165
|
+
else:
|
166
|
+
obj_buf = obj_ret
|
167
|
+
n_rows_read_i = len(obj_buf)
|
168
|
+
|
169
|
+
n_rows_read += n_rows_read_i
|
170
|
+
if n_rows_read >= n_rows or obj_buf is None:
|
171
|
+
return obj_buf, n_rows_read
|
172
|
+
start_row = 0
|
173
|
+
obj_buf_start += n_rows_read_i
|
174
|
+
return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
|
175
|
+
|
176
|
+
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
177
|
+
idx = idx[0]
|
178
|
+
if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
|
179
|
+
idx = np.where(idx)[0]
|
121
180
|
|
122
181
|
obj, n_rows_read = _serializers._h5_read_lgdo(
|
123
|
-
lh5_obj,
|
182
|
+
lh5_obj.id,
|
183
|
+
lh5_obj.file.filename,
|
184
|
+
lh5_obj.name,
|
124
185
|
start_row=start_row,
|
125
186
|
n_rows=n_rows,
|
126
187
|
idx=idx,
|
@@ -143,6 +204,7 @@ def write(
|
|
143
204
|
n_rows: int | None = None,
|
144
205
|
wo_mode: str = "append",
|
145
206
|
write_start: int = 0,
|
207
|
+
page_buffer: int = 0,
|
146
208
|
**h5py_kwargs,
|
147
209
|
) -> None:
|
148
210
|
"""Write an LGDO into an LH5 file.
|
@@ -218,6 +280,11 @@ def write(
|
|
218
280
|
write_start
|
219
281
|
row in the output file (if already existing) to start overwriting
|
220
282
|
from.
|
283
|
+
page_buffer
|
284
|
+
enable paged aggregation with a buffer of this size in bytes
|
285
|
+
Only used when creating a new file. Useful when writing a file
|
286
|
+
with a large number of small datasets. This is a short-hand for
|
287
|
+
``(fs_stragety="page", fs_pagesize=[page_buffer])``
|
221
288
|
**h5py_kwargs
|
222
289
|
additional keyword arguments forwarded to
|
223
290
|
:meth:`h5py.Group.create_dataset` to specify, for example, an HDF5
|
@@ -225,6 +292,13 @@ def write(
|
|
225
292
|
datasets. **Note: `compression` Ignored if compression is specified
|
226
293
|
as an `obj` attribute.**
|
227
294
|
"""
|
295
|
+
if wo_mode in ("w", "write", "of", "overwrite_file"):
|
296
|
+
h5py_kwargs.update(
|
297
|
+
{
|
298
|
+
"fs_strategy": "page",
|
299
|
+
"fs_page_size": page_buffer,
|
300
|
+
}
|
301
|
+
)
|
228
302
|
return _serializers._h5_write_lgdo(
|
229
303
|
obj,
|
230
304
|
name,
|
lgdo/lh5/exceptions.py
CHANGED
@@ -4,11 +4,11 @@ import h5py
|
|
4
4
|
|
5
5
|
|
6
6
|
class LH5DecodeError(Exception):
|
7
|
-
def __init__(self, message: str,
|
7
|
+
def __init__(self, message: str, fname: str, oname: str) -> None:
|
8
8
|
super().__init__(message)
|
9
9
|
|
10
|
-
self.file =
|
11
|
-
self.obj =
|
10
|
+
self.file = fname
|
11
|
+
self.obj = oname
|
12
12
|
|
13
13
|
def __str__(self) -> str:
|
14
14
|
return (
|