legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.0.dist-info}/METADATA +2 -2
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.0.dist-info}/RECORD +23 -23
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.0.dist-info}/WHEEL +1 -1
- lgdo/_version.py +2 -2
- lgdo/lh5/_serializers/read/array.py +9 -9
- lgdo/lh5/_serializers/read/composite.py +68 -78
- lgdo/lh5/_serializers/read/encoded.py +31 -9
- lgdo/lh5/_serializers/read/ndarray.py +51 -37
- lgdo/lh5/_serializers/read/scalar.py +10 -3
- lgdo/lh5/_serializers/read/utils.py +26 -3
- lgdo/lh5/_serializers/read/vector_of_vectors.py +35 -13
- lgdo/lh5/_serializers/write/array.py +6 -1
- lgdo/lh5/_serializers/write/composite.py +14 -5
- lgdo/lh5/_serializers/write/scalar.py +6 -1
- lgdo/lh5/core.py +78 -7
- lgdo/lh5/exceptions.py +3 -3
- lgdo/lh5/store.py +101 -11
- lgdo/lh5/tools.py +1 -1
- lgdo/lh5/utils.py +13 -2
- lgdo/types/histogram.py +18 -3
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.0.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.0.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.0.dist-info}/top_level.txt +0 -0
lgdo/lh5/store.py
CHANGED
@@ -5,13 +5,16 @@ HDF5 files.
|
|
5
5
|
|
6
6
|
from __future__ import annotations
|
7
7
|
|
8
|
+
import bisect
|
8
9
|
import logging
|
9
10
|
import os
|
10
11
|
import sys
|
11
12
|
from collections.abc import Mapping, Sequence
|
13
|
+
from inspect import signature
|
12
14
|
from typing import Any
|
13
15
|
|
14
16
|
import h5py
|
17
|
+
import numpy as np
|
15
18
|
from numpy.typing import ArrayLike
|
16
19
|
|
17
20
|
from .. import types
|
@@ -34,7 +37,9 @@ class LH5Store:
|
|
34
37
|
lgdo.waveformtable.WaveformTable
|
35
38
|
"""
|
36
39
|
|
37
|
-
def __init__(
|
40
|
+
def __init__(
|
41
|
+
self, base_path: str = "", keep_open: bool = False, locking: bool = False
|
42
|
+
) -> None:
|
38
43
|
"""
|
39
44
|
Parameters
|
40
45
|
----------
|
@@ -43,12 +48,21 @@ class LH5Store:
|
|
43
48
|
keep_open
|
44
49
|
whether to keep files open by storing the :mod:`h5py` objects as
|
45
50
|
class attributes.
|
51
|
+
locking
|
52
|
+
whether to lock files when reading
|
46
53
|
"""
|
47
54
|
self.base_path = "" if base_path == "" else utils.expand_path(base_path)
|
48
55
|
self.keep_open = keep_open
|
56
|
+
self.locking = locking
|
49
57
|
self.files = {}
|
50
58
|
|
51
|
-
def gimme_file(
|
59
|
+
def gimme_file(
|
60
|
+
self,
|
61
|
+
lh5_file: str | h5py.File,
|
62
|
+
mode: str = "r",
|
63
|
+
page_buffer: int = 0,
|
64
|
+
**file_kwargs,
|
65
|
+
) -> h5py.File:
|
52
66
|
"""Returns a :mod:`h5py` file object from the store or creates a new one.
|
53
67
|
|
54
68
|
Parameters
|
@@ -57,12 +71,20 @@ class LH5Store:
|
|
57
71
|
LH5 file name.
|
58
72
|
mode
|
59
73
|
mode in which to open file. See :class:`h5py.File` documentation.
|
74
|
+
page_buffer
|
75
|
+
enable paged aggregation with a buffer of this size in bytes
|
76
|
+
Only used when creating a new file. Useful when writing a file
|
77
|
+
with a large number of small datasets. This is a short-hand for
|
78
|
+
``(fs_stragety="page", fs_pagesize=[page_buffer])``
|
79
|
+
file_kwargs
|
80
|
+
Keyword arguments for :class:`h5py.File`
|
60
81
|
"""
|
61
82
|
if isinstance(lh5_file, h5py.File):
|
62
83
|
return lh5_file
|
63
84
|
|
64
85
|
if mode == "r":
|
65
86
|
lh5_file = utils.expand_path(lh5_file, base_path=self.base_path)
|
87
|
+
file_kwargs["locking"] = self.locking
|
66
88
|
|
67
89
|
if lh5_file in self.files:
|
68
90
|
return self.files[lh5_file]
|
@@ -72,20 +94,30 @@ class LH5Store:
|
|
72
94
|
else:
|
73
95
|
full_path = lh5_file
|
74
96
|
|
97
|
+
file_exists = os.path.exists(full_path)
|
75
98
|
if mode != "r":
|
76
99
|
directory = os.path.dirname(full_path)
|
77
100
|
if directory != "" and not os.path.exists(directory):
|
78
101
|
log.debug(f"making path {directory}")
|
79
102
|
os.makedirs(directory)
|
80
103
|
|
81
|
-
if mode == "r" and not
|
104
|
+
if mode == "r" and not file_exists:
|
82
105
|
msg = f"file {full_path} not found"
|
83
106
|
raise FileNotFoundError(msg)
|
107
|
+
if not file_exists:
|
108
|
+
mode = "w"
|
84
109
|
|
85
|
-
if mode != "r" and
|
110
|
+
if mode != "r" and file_exists:
|
86
111
|
log.debug(f"opening existing file {full_path} in mode '{mode}'")
|
87
112
|
|
88
|
-
|
113
|
+
if mode == "w":
|
114
|
+
file_kwargs.update(
|
115
|
+
{
|
116
|
+
"fs_strategy": "page",
|
117
|
+
"fs_page_size": page_buffer,
|
118
|
+
}
|
119
|
+
)
|
120
|
+
h5f = h5py.File(full_path, mode, **file_kwargs)
|
89
121
|
|
90
122
|
if self.keep_open:
|
91
123
|
self.files[lh5_file] = h5f
|
@@ -135,6 +167,7 @@ class LH5Store:
|
|
135
167
|
obj_buf: types.LGDO = None,
|
136
168
|
obj_buf_start: int = 0,
|
137
169
|
decompress: bool = True,
|
170
|
+
**file_kwargs,
|
138
171
|
) -> tuple[types.LGDO, int]:
|
139
172
|
"""Read LH5 object data from a file in the store.
|
140
173
|
|
@@ -143,13 +176,62 @@ class LH5Store:
|
|
143
176
|
.lh5.core.read
|
144
177
|
"""
|
145
178
|
# grab files from store
|
146
|
-
if
|
147
|
-
lh5_obj =
|
179
|
+
if isinstance(lh5_file, (str, h5py.File)):
|
180
|
+
lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
|
148
181
|
else:
|
149
|
-
|
150
|
-
|
182
|
+
lh5_files = list(lh5_file)
|
183
|
+
n_rows_read = 0
|
184
|
+
|
185
|
+
for i, h5f in enumerate(lh5_files):
|
186
|
+
if (
|
187
|
+
isinstance(idx, (list, tuple))
|
188
|
+
and len(idx) > 0
|
189
|
+
and not np.isscalar(idx[0])
|
190
|
+
):
|
191
|
+
# a list of lists: must be one per file
|
192
|
+
idx_i = idx[i]
|
193
|
+
elif idx is not None:
|
194
|
+
# make idx a proper tuple if it's not one already
|
195
|
+
if not (isinstance(idx, tuple) and len(idx) == 1):
|
196
|
+
idx = (idx,)
|
197
|
+
# idx is a long continuous array
|
198
|
+
n_rows_i = utils.read_n_rows(name, h5f)
|
199
|
+
# find the length of the subset of idx that contains indices
|
200
|
+
# that are less than n_rows_i
|
201
|
+
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
202
|
+
# now split idx into idx_i and the remainder
|
203
|
+
idx_i = np.array(idx[0])[:n_rows_to_read_i]
|
204
|
+
idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
|
205
|
+
else:
|
206
|
+
idx_i = None
|
207
|
+
n_rows_i = n_rows - n_rows_read
|
208
|
+
|
209
|
+
obj_buf, n_rows_read_i = self.read(
|
210
|
+
name,
|
211
|
+
h5f,
|
212
|
+
start_row,
|
213
|
+
n_rows_i,
|
214
|
+
idx_i,
|
215
|
+
use_h5idx,
|
216
|
+
field_mask,
|
217
|
+
obj_buf,
|
218
|
+
obj_buf_start,
|
219
|
+
decompress,
|
220
|
+
)
|
221
|
+
|
222
|
+
n_rows_read += n_rows_read_i
|
223
|
+
if n_rows_read >= n_rows or obj_buf is None:
|
224
|
+
return obj_buf, n_rows_read
|
225
|
+
start_row = 0
|
226
|
+
obj_buf_start += n_rows_read_i
|
227
|
+
return obj_buf, n_rows_read
|
228
|
+
|
229
|
+
if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
|
230
|
+
idx = idx[0]
|
151
231
|
return _serializers._h5_read_lgdo(
|
152
|
-
lh5_obj,
|
232
|
+
lh5_obj.id,
|
233
|
+
lh5_obj.file.filename,
|
234
|
+
lh5_obj.name,
|
153
235
|
start_row=start_row,
|
154
236
|
n_rows=n_rows,
|
155
237
|
idx=idx,
|
@@ -170,6 +252,7 @@ class LH5Store:
|
|
170
252
|
n_rows: int | None = None,
|
171
253
|
wo_mode: str = "append",
|
172
254
|
write_start: int = 0,
|
255
|
+
page_buffer: int = 0,
|
173
256
|
**h5py_kwargs,
|
174
257
|
) -> None:
|
175
258
|
"""Write an LGDO into an LH5 file.
|
@@ -199,10 +282,17 @@ class LH5Store:
|
|
199
282
|
# write_object:overwrite.
|
200
283
|
mode = "w" if wo_mode == "of" else "a"
|
201
284
|
|
285
|
+
file_kwargs = {
|
286
|
+
k: h5py_kwargs[k]
|
287
|
+
for k in h5py_kwargs & signature(h5py.File).parameters.keys()
|
288
|
+
}
|
289
|
+
|
202
290
|
return _serializers._h5_write_lgdo(
|
203
291
|
obj,
|
204
292
|
name,
|
205
|
-
self.gimme_file(
|
293
|
+
self.gimme_file(
|
294
|
+
lh5_file, mode=mode, page_buffer=page_buffer, **file_kwargs
|
295
|
+
),
|
206
296
|
group=group,
|
207
297
|
start_row=start_row,
|
208
298
|
n_rows=n_rows,
|
lgdo/lh5/tools.py
CHANGED
lgdo/lh5/utils.py
CHANGED
@@ -125,7 +125,12 @@ def get_h5_group(
|
|
125
125
|
else:
|
126
126
|
group = base_group.create_group(group)
|
127
127
|
if grp_attrs is not None:
|
128
|
-
group.attrs.update(
|
128
|
+
group.attrs.update(
|
129
|
+
{
|
130
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
131
|
+
for k, v in grp_attrs.items()
|
132
|
+
}
|
133
|
+
)
|
129
134
|
return group
|
130
135
|
if (
|
131
136
|
grp_attrs is not None
|
@@ -141,7 +146,13 @@ def get_h5_group(
|
|
141
146
|
log.debug(f"overwriting {group}.attrs...")
|
142
147
|
for key in group.attrs:
|
143
148
|
group.attrs.pop(key)
|
144
|
-
|
149
|
+
|
150
|
+
group.attrs.update(
|
151
|
+
{
|
152
|
+
k: v.encode("utf-8") if isinstance(v, str) else v
|
153
|
+
for k, v in grp_attrs.items()
|
154
|
+
}
|
155
|
+
)
|
145
156
|
|
146
157
|
return group
|
147
158
|
|
lgdo/types/histogram.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
import logging
|
3
4
|
from collections.abc import Iterable
|
4
5
|
from typing import Any
|
5
6
|
|
@@ -12,6 +13,8 @@ from .lgdo import LGDO
|
|
12
13
|
from .scalar import Scalar
|
13
14
|
from .struct import Struct
|
14
15
|
|
16
|
+
log = logging.getLogger(__name__)
|
17
|
+
|
15
18
|
|
16
19
|
class Histogram(Struct):
|
17
20
|
class Axis(Struct):
|
@@ -197,6 +200,7 @@ class Histogram(Struct):
|
|
197
200
|
isdensity: bool = False,
|
198
201
|
attrs: dict[str, Any] | None = None,
|
199
202
|
binedge_attrs: dict[str, Any] | None = None,
|
203
|
+
flow: bool = True,
|
200
204
|
) -> None:
|
201
205
|
"""A special struct to contain histogrammed data.
|
202
206
|
|
@@ -221,6 +225,16 @@ class Histogram(Struct):
|
|
221
225
|
as binning.
|
222
226
|
attrs
|
223
227
|
a set of user attributes to be carried along with this LGDO.
|
228
|
+
flow
|
229
|
+
If ``False``, discard counts in over-/underflow bins of the passed
|
230
|
+
:class:`hist.Hist` instance. If ``True``, this data will also be discarded,
|
231
|
+
but a warning is emitted.
|
232
|
+
|
233
|
+
.. note ::
|
234
|
+
|
235
|
+
:class:`Histogram` does not support storing counts in overflow or
|
236
|
+
underflow bins. This parameter just controls, whether a warning will
|
237
|
+
be emitted.
|
224
238
|
"""
|
225
239
|
if isinstance(weights, hist.Hist):
|
226
240
|
if binning is not None:
|
@@ -230,9 +244,10 @@ class Histogram(Struct):
|
|
230
244
|
msg = "not allowed to pass isdensity=True if constructing from hist.Hist instance"
|
231
245
|
raise ValueError(msg)
|
232
246
|
|
233
|
-
if weights.sum(flow=True) != weights.sum(flow=False):
|
234
|
-
|
235
|
-
|
247
|
+
if weights.sum(flow=True) != weights.sum(flow=False) and flow:
|
248
|
+
log.warning(
|
249
|
+
"flow bins of hist.Hist cannot be represented, their counts are discarded"
|
250
|
+
)
|
236
251
|
weights_view = weights.view(flow=False)
|
237
252
|
if type(weights_view) is not np.ndarray:
|
238
253
|
msg = "only simple numpy-backed storages can be used in a hist.Hist"
|
File without changes
|
File without changes
|
File without changes
|