legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/METADATA +2 -2
- legend_pydataobj-1.10.1.dist-info/RECORD +55 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/WHEEL +1 -1
- lgdo/_version.py +2 -2
- lgdo/compression/radware.py +8 -16
- lgdo/lh5/_serializers/read/array.py +9 -9
- lgdo/lh5/_serializers/read/composite.py +67 -78
- lgdo/lh5/_serializers/read/encoded.py +31 -9
- lgdo/lh5/_serializers/read/ndarray.py +55 -42
- lgdo/lh5/_serializers/read/scalar.py +10 -3
- lgdo/lh5/_serializers/read/utils.py +165 -3
- lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
- lgdo/lh5/_serializers/write/array.py +6 -1
- lgdo/lh5/_serializers/write/composite.py +14 -5
- lgdo/lh5/_serializers/write/scalar.py +6 -1
- lgdo/lh5/core.py +81 -7
- lgdo/lh5/exceptions.py +3 -3
- lgdo/lh5/iterator.py +258 -74
- lgdo/lh5/store.py +116 -12
- lgdo/lh5/tools.py +1 -1
- lgdo/lh5/utils.py +29 -44
- lgdo/types/histogram.py +122 -6
- lgdo/types/table.py +2 -2
- lgdo/types/vectorofvectors.py +1 -1
- legend_pydataobj-1.9.0.dist-info/RECORD +0 -55
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/LICENSE +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/top_level.txt +0 -0
lgdo/lh5/iterator.py
CHANGED
@@ -2,11 +2,14 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import logging
|
4
4
|
import typing
|
5
|
+
from warnings import warn
|
5
6
|
|
6
7
|
import numpy as np
|
7
8
|
import pandas as pd
|
9
|
+
from numpy.typing import NDArray
|
8
10
|
|
9
11
|
from ..types import Array, Scalar, Struct, VectorOfVectors
|
12
|
+
from ..units import default_units_registry as ureg
|
10
13
|
from .store import LH5Store
|
11
14
|
from .utils import expand_path
|
12
15
|
|
@@ -19,35 +22,53 @@ class LH5Iterator(typing.Iterator):
|
|
19
22
|
at a time. This also accepts an entry list/mask to enable event selection,
|
20
23
|
and a field mask.
|
21
24
|
|
22
|
-
This
|
25
|
+
This can be used as an iterator:
|
23
26
|
|
24
|
-
>>> lh5_obj, n_rows
|
25
|
-
|
26
|
-
to read the block of entries starting at entry. In case of multiple files
|
27
|
-
or the use of an event selection, entry refers to a global event index
|
28
|
-
across files and does not count events that are excluded by the selection.
|
29
|
-
|
30
|
-
This can also be used as an iterator:
|
31
|
-
|
32
|
-
>>> for lh5_obj, entry, n_rows in LH5Iterator(...):
|
27
|
+
>>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
|
33
28
|
>>> # do the thing!
|
34
29
|
|
35
|
-
This is intended for if you are reading a large quantity of data
|
36
|
-
|
30
|
+
This is intended for if you are reading a large quantity of data. This
|
31
|
+
will ensure that you traverse files efficiently to minimize caching time
|
32
|
+
and will limit your memory usage (particularly when reading in waveforms!).
|
37
33
|
The ``lh5_obj`` that is read by this class is reused in order to avoid
|
38
34
|
reallocation of memory; this means that if you want to hold on to data
|
39
35
|
between reads, you will have to copy it somewhere!
|
36
|
+
|
37
|
+
When defining an LH5Iterator, you must give it a list of files and the
|
38
|
+
hdf5 groups containing the data tables you are reading. You may also
|
39
|
+
provide a field mask, and an entry list or mask, specifying which entries
|
40
|
+
to read from the files. You may also pair it with a friend iterator, which
|
41
|
+
contains a parallel group of files which will be simultaneously read.
|
42
|
+
In addition to accessing requested data via ``lh5_obj``, several
|
43
|
+
properties exist to tell you where that data came from:
|
44
|
+
|
45
|
+
- lh5_it.current_local_entries: get the entry numbers relative to the
|
46
|
+
file the data came from
|
47
|
+
- lh5_it.current_global_entries: get the entry number relative to the
|
48
|
+
full dataset
|
49
|
+
- lh5_it.current_files: get the file name corresponding to each entry
|
50
|
+
- lh5_it.current_groups: get the group name corresponding to each entry
|
51
|
+
|
52
|
+
This class can also be used either for random access:
|
53
|
+
|
54
|
+
>>> lh5_obj, n_rows = lh5_it.read(i_entry)
|
55
|
+
|
56
|
+
to read the block of entries starting at i_entry. In case of multiple files
|
57
|
+
or the use of an event selection, i_entry refers to a global event index
|
58
|
+
across files and does not count events that are excluded by the selection.
|
40
59
|
"""
|
41
60
|
|
42
61
|
def __init__(
|
43
62
|
self,
|
44
63
|
lh5_files: str | list[str],
|
45
|
-
groups: str | list[str],
|
64
|
+
groups: str | list[str] | list[list[str]],
|
46
65
|
base_path: str = "",
|
47
66
|
entry_list: list[int] | list[list[int]] | None = None,
|
48
67
|
entry_mask: list[bool] | list[list[bool]] | None = None,
|
49
68
|
field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
|
50
|
-
buffer_len: int =
|
69
|
+
buffer_len: int = "100*MB",
|
70
|
+
file_cache: int = 10,
|
71
|
+
file_map: NDArray[int] = None,
|
51
72
|
friend: typing.Iterator | None = None,
|
52
73
|
) -> None:
|
53
74
|
"""
|
@@ -57,9 +78,10 @@ class LH5Iterator(typing.Iterator):
|
|
57
78
|
file or files to read from. May include wildcards and environment
|
58
79
|
variables.
|
59
80
|
groups
|
60
|
-
HDF5 group(s) to read. If a list is provided
|
61
|
-
|
62
|
-
|
81
|
+
HDF5 group(s) to read. If a list of strings is provided, use
|
82
|
+
same groups for each file. If a list of lists is provided, size
|
83
|
+
of outer list must match size of file list, and each inner list
|
84
|
+
will apply to a single file (or set of wildcarded files)
|
63
85
|
entry_list
|
64
86
|
list of entry numbers to read. If a nested list is provided,
|
65
87
|
expect one top-level list for each file, containing a list of
|
@@ -72,66 +94,98 @@ class LH5Iterator(typing.Iterator):
|
|
72
94
|
more details.
|
73
95
|
buffer_len
|
74
96
|
number of entries to read at a time while iterating through files.
|
97
|
+
file_cache
|
98
|
+
maximum number of files to keep open at a time
|
99
|
+
file_map
|
100
|
+
cumulative file/group entries. This can be provided on construction
|
101
|
+
to speed up random or sparse access; otherwise, we sequentially
|
102
|
+
read the size of each group. WARNING: no checks for accuracy are
|
103
|
+
performed so only use this if you know what you are doing!
|
75
104
|
friend
|
76
105
|
a \"friend\" LH5Iterator that will be read in parallel with this.
|
77
106
|
The friend should have the same length and entry list. A single
|
78
107
|
LH5 table containing columns from both iterators will be returned.
|
108
|
+
Note that buffer_len will be set to the minimum of the two.
|
79
109
|
"""
|
80
|
-
self.lh5_st = LH5Store(base_path=base_path, keep_open=
|
110
|
+
self.lh5_st = LH5Store(base_path=base_path, keep_open=file_cache)
|
81
111
|
|
82
112
|
# List of files, with wildcards and env vars expanded
|
83
113
|
if isinstance(lh5_files, str):
|
84
114
|
lh5_files = [lh5_files]
|
85
|
-
|
86
|
-
lh5_files *= len(groups)
|
87
|
-
elif not isinstance(lh5_files, list):
|
115
|
+
elif not isinstance(lh5_files, (list, set, tuple)):
|
88
116
|
msg = "lh5_files must be a string or list of strings"
|
89
117
|
raise ValueError(msg)
|
90
118
|
|
91
119
|
if isinstance(groups, str):
|
92
|
-
groups = [groups] * len(lh5_files)
|
120
|
+
groups = [[groups]] * len(lh5_files)
|
93
121
|
elif not isinstance(groups, list):
|
94
|
-
msg = "group must be a string or list
|
122
|
+
msg = "group must be a string or appropriate list"
|
123
|
+
raise ValueError(msg)
|
124
|
+
elif all(isinstance(g, str) for g in groups):
|
125
|
+
groups = [groups] * len(lh5_files)
|
126
|
+
elif len(groups) == len(lh5_files) and all(
|
127
|
+
isinstance(gr_list, (list, set, tuple)) for gr_list in groups
|
128
|
+
):
|
129
|
+
pass
|
130
|
+
else:
|
131
|
+
msg = "group must be a string or appropriate list"
|
95
132
|
raise ValueError(msg)
|
96
133
|
|
97
134
|
if len(groups) != len(lh5_files):
|
98
135
|
msg = "lh5_files and groups must have same length"
|
99
136
|
raise ValueError(msg)
|
100
137
|
|
138
|
+
# make flattened outer-product-like list of files and groups
|
101
139
|
self.lh5_files = []
|
102
140
|
self.groups = []
|
103
141
|
for f, g in zip(lh5_files, groups):
|
104
|
-
f_exp
|
105
|
-
|
106
|
-
|
142
|
+
for f_exp in expand_path(f, list=True, base_path=base_path):
|
143
|
+
self.lh5_files += [f_exp] * len(g)
|
144
|
+
self.groups += list(g)
|
107
145
|
|
108
146
|
if entry_list is not None and entry_mask is not None:
|
109
147
|
msg = "entry_list and entry_mask arguments are mutually exclusive"
|
110
148
|
raise ValueError(msg)
|
111
149
|
|
112
150
|
# Map to last row in each file
|
113
|
-
|
151
|
+
if file_map is None:
|
152
|
+
self.file_map = np.full(len(self.lh5_files), np.iinfo("q").max, "q")
|
153
|
+
else:
|
154
|
+
self.file_map = np.array(file_map)
|
155
|
+
|
114
156
|
# Map to last iterator entry for each file
|
115
|
-
self.entry_map = np.full(len(self.lh5_files), np.iinfo("
|
157
|
+
self.entry_map = np.full(len(self.lh5_files), np.iinfo("q").max, "q")
|
116
158
|
self.buffer_len = buffer_len
|
117
159
|
|
118
160
|
if len(self.lh5_files) > 0:
|
119
161
|
f = self.lh5_files[0]
|
120
162
|
g = self.groups[0]
|
163
|
+
n_rows = self.lh5_st.read_n_rows(g, f)
|
164
|
+
|
165
|
+
if isinstance(self.buffer_len, str):
|
166
|
+
self.buffer_len = ureg.Quantity(buffer_len)
|
167
|
+
if isinstance(self.buffer_len, ureg.Quantity):
|
168
|
+
self.buffer_len = int(
|
169
|
+
self.buffer_len
|
170
|
+
/ (self.lh5_st.read_size_in_bytes(g, f) * ureg.B)
|
171
|
+
* n_rows
|
172
|
+
)
|
173
|
+
|
121
174
|
self.lh5_buffer = self.lh5_st.get_buffer(
|
122
175
|
g,
|
123
176
|
f,
|
124
177
|
size=self.buffer_len,
|
125
178
|
field_mask=field_mask,
|
126
179
|
)
|
127
|
-
|
180
|
+
if file_map is None:
|
181
|
+
self.file_map[0] = n_rows
|
128
182
|
else:
|
129
183
|
msg = f"can't open any files from {lh5_files}"
|
130
184
|
raise RuntimeError(msg)
|
131
185
|
|
132
186
|
self.n_rows = 0
|
133
|
-
self.
|
134
|
-
self.
|
187
|
+
self.current_i_entry = 0
|
188
|
+
self.next_i_entry = 0
|
135
189
|
|
136
190
|
self.field_mask = field_mask
|
137
191
|
|
@@ -142,13 +196,13 @@ class LH5Iterator(typing.Iterator):
|
|
142
196
|
entry_list = list(entry_list)
|
143
197
|
if isinstance(entry_list[0], int):
|
144
198
|
self.local_entry_list = [None] * len(self.file_map)
|
145
|
-
self.global_entry_list = np.array(entry_list, "
|
199
|
+
self.global_entry_list = np.array(entry_list, "q")
|
146
200
|
self.global_entry_list.sort()
|
147
201
|
|
148
202
|
else:
|
149
203
|
self.local_entry_list = [[]] * len(self.file_map)
|
150
204
|
for i_file, local_list in enumerate(entry_list):
|
151
|
-
self.local_entry_list[i_file] = np.array(local_list, "
|
205
|
+
self.local_entry_list[i_file] = np.array(local_list, "q")
|
152
206
|
self.local_entry_list[i_file].sort()
|
153
207
|
|
154
208
|
elif entry_mask is not None:
|
@@ -168,6 +222,15 @@ class LH5Iterator(typing.Iterator):
|
|
168
222
|
if not isinstance(friend, typing.Iterator):
|
169
223
|
msg = "Friend must be an Iterator"
|
170
224
|
raise ValueError(msg)
|
225
|
+
|
226
|
+
# set buffer_lens to be equal
|
227
|
+
if self.buffer_len < friend.buffer_len:
|
228
|
+
friend.buffer_len = self.buffer_len
|
229
|
+
friend.lh5_buffer.resize(self.buffer_len)
|
230
|
+
elif self.buffer_len > friend.buffer_len:
|
231
|
+
self.buffer_len = friend.buffer_len
|
232
|
+
self.lh5_buffer.resize(friend.buffer_len)
|
233
|
+
|
171
234
|
self.lh5_buffer.join(friend.lh5_buffer)
|
172
235
|
self.friend = friend
|
173
236
|
|
@@ -176,33 +239,52 @@ class LH5Iterator(typing.Iterator):
|
|
176
239
|
if i_file < 0:
|
177
240
|
return 0
|
178
241
|
fcl = self.file_map[i_file]
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
)
|
183
|
-
self.file_map[
|
242
|
+
|
243
|
+
# if we haven't already calculated, calculate for all files up to i_file
|
244
|
+
if fcl == np.iinfo("q").max:
|
245
|
+
i_start = np.searchsorted(self.file_map, np.iinfo("q").max)
|
246
|
+
fcl = self.file_map[i_start - 1] if i_start > 0 else 0
|
247
|
+
|
248
|
+
for i in range(i_start, i_file + 1):
|
249
|
+
fcl += self.lh5_st.read_n_rows(self.groups[i], self.lh5_files[i])
|
250
|
+
self.file_map[i] = fcl
|
184
251
|
return fcl
|
185
252
|
|
253
|
+
@property
|
254
|
+
def current_entry(self) -> int:
|
255
|
+
"deprecated alias for current_i_entry"
|
256
|
+
warn(
|
257
|
+
"current_entry has been renamed to current_i_entry.",
|
258
|
+
DeprecationWarning,
|
259
|
+
stacklevel=2,
|
260
|
+
)
|
261
|
+
|
262
|
+
return self.current_i_entry
|
263
|
+
|
186
264
|
def _get_file_cumentries(self, i_file: int) -> int:
|
187
265
|
"""Helper to get cumulative iterator entries in file"""
|
188
266
|
if i_file < 0:
|
189
267
|
return 0
|
190
268
|
n = self.entry_map[i_file]
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
269
|
+
|
270
|
+
# if we haven't already calculated, calculate for all files up to i_file
|
271
|
+
if n == np.iinfo("q").max:
|
272
|
+
i_start = np.searchsorted(self.entry_map, np.iinfo("q").max)
|
273
|
+
n = self.entry_map[i_start - 1] if i_start > 0 else 0
|
274
|
+
|
275
|
+
for i in range(i_start, i_file + 1):
|
276
|
+
elist = self.get_file_entrylist(i)
|
277
|
+
fcl = self._get_file_cumlen(i)
|
278
|
+
if elist is None:
|
279
|
+
# no entry list provided
|
280
|
+
n = fcl
|
281
|
+
else:
|
282
|
+
n += len(elist)
|
283
|
+
# check that file entries fall inside of file
|
284
|
+
if len(elist) > 0 and elist[-1] >= fcl:
|
285
|
+
logging.warning(f"Found entries out of range for file {i}")
|
286
|
+
n += np.searchsorted(elist, fcl, "right") - len(elist)
|
287
|
+
self.entry_map[i] = n
|
206
288
|
return n
|
207
289
|
|
208
290
|
def get_file_entrylist(self, i_file: int) -> np.ndarray:
|
@@ -218,50 +300,50 @@ class LH5Iterator(typing.Iterator):
|
|
218
300
|
f_end = self._get_file_cumlen(i_file)
|
219
301
|
i_start = self._get_file_cumentries(i_file - 1)
|
220
302
|
i_stop = np.searchsorted(self.global_entry_list, f_end, "right")
|
221
|
-
elist = np.array(self.global_entry_list[i_start:i_stop], "
|
303
|
+
elist = np.array(self.global_entry_list[i_start:i_stop], "q") - f_start
|
222
304
|
self.local_entry_list[i_file] = elist
|
223
305
|
return elist
|
224
306
|
|
225
307
|
def get_global_entrylist(self) -> np.ndarray:
|
226
308
|
"""Get global entry list, constructing it if needed"""
|
227
309
|
if self.global_entry_list is None and self.local_entry_list is not None:
|
228
|
-
self.global_entry_list = np.zeros(len(self), "
|
310
|
+
self.global_entry_list = np.zeros(len(self), "q")
|
229
311
|
for i_file in range(len(self.lh5_files)):
|
230
|
-
i_start = self.
|
231
|
-
i_stop = self.
|
232
|
-
f_start = self.
|
312
|
+
i_start = self._get_file_cumentries(i_file - 1)
|
313
|
+
i_stop = self._get_file_cumentries(i_file)
|
314
|
+
f_start = self._get_file_cumlen(i_file - 1)
|
233
315
|
self.global_entry_list[i_start:i_stop] = (
|
234
316
|
self.get_file_entrylist(i_file) + f_start
|
235
317
|
)
|
236
318
|
return self.global_entry_list
|
237
319
|
|
238
|
-
def read(self,
|
239
|
-
"""Read the nextlocal chunk of events, starting at
|
320
|
+
def read(self, i_entry: int) -> tuple[LGDO, int]:
|
321
|
+
"""Read the nextlocal chunk of events, starting at i_entry. Return the
|
240
322
|
LH5 buffer and number of rows read."""
|
241
323
|
self.n_rows = 0
|
242
|
-
i_file = np.searchsorted(self.entry_map,
|
324
|
+
i_file = np.searchsorted(self.entry_map, i_entry, "right")
|
243
325
|
|
244
326
|
# if file hasn't been opened yet, search through files
|
245
327
|
# sequentially until we find the right one
|
246
|
-
if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("
|
247
|
-
while i_file < len(self.lh5_files) and
|
328
|
+
if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
|
329
|
+
while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
|
248
330
|
i_file
|
249
331
|
):
|
250
332
|
i_file += 1
|
251
333
|
|
252
334
|
if i_file == len(self.lh5_files):
|
253
335
|
return (self.lh5_buffer, self.n_rows)
|
254
|
-
|
336
|
+
local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
|
255
337
|
|
256
338
|
while self.n_rows < self.buffer_len and i_file < len(self.file_map):
|
257
339
|
# Loop through files
|
258
340
|
local_idx = self.get_file_entrylist(i_file)
|
259
341
|
if local_idx is not None and len(local_idx) == 0:
|
260
342
|
i_file += 1
|
261
|
-
|
343
|
+
local_i_entry = 0
|
262
344
|
continue
|
263
345
|
|
264
|
-
i_local =
|
346
|
+
i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
|
265
347
|
self.lh5_buffer, n_rows = self.lh5_st.read(
|
266
348
|
self.groups[i_file],
|
267
349
|
self.lh5_files[i_file],
|
@@ -275,12 +357,12 @@ class LH5Iterator(typing.Iterator):
|
|
275
357
|
|
276
358
|
self.n_rows += n_rows
|
277
359
|
i_file += 1
|
278
|
-
|
360
|
+
local_i_entry = 0
|
279
361
|
|
280
|
-
self.
|
362
|
+
self.current_i_entry = i_entry
|
281
363
|
|
282
364
|
if self.friend is not None:
|
283
|
-
self.friend.read(
|
365
|
+
self.friend.read(i_entry)
|
284
366
|
|
285
367
|
return (self.lh5_buffer, self.n_rows)
|
286
368
|
|
@@ -290,6 +372,108 @@ class LH5Iterator(typing.Iterator):
|
|
290
372
|
if self.friend is not None:
|
291
373
|
self.friend.reset_field_mask(mask)
|
292
374
|
|
375
|
+
@property
|
376
|
+
def current_local_entries(self) -> NDArray[int]:
|
377
|
+
"""Return list of local file entries in buffer"""
|
378
|
+
cur_entries = np.zeros(self.n_rows, dtype="int32")
|
379
|
+
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
380
|
+
file_start = self._get_file_cumentries(i_file - 1)
|
381
|
+
i_local = self.current_i_entry - file_start
|
382
|
+
i = 0
|
383
|
+
|
384
|
+
while i < len(cur_entries):
|
385
|
+
# number of entries to read from this file
|
386
|
+
file_end = self._get_file_cumentries(i_file)
|
387
|
+
n = min(file_end - file_start - i_local, len(cur_entries) - i)
|
388
|
+
entries = self.get_file_entrylist(i_file)
|
389
|
+
|
390
|
+
if entries is None:
|
391
|
+
cur_entries[i : i + n] = np.arange(i_local, i_local + n)
|
392
|
+
else:
|
393
|
+
cur_entries[i : i + n] = entries[i_local : i_local + n]
|
394
|
+
|
395
|
+
i_file += 1
|
396
|
+
file_start = file_end
|
397
|
+
i_local = 0
|
398
|
+
i += n
|
399
|
+
|
400
|
+
return cur_entries
|
401
|
+
|
402
|
+
@property
|
403
|
+
def current_global_entries(self) -> NDArray[int]:
|
404
|
+
"""Return list of local file entries in buffer"""
|
405
|
+
cur_entries = np.zeros(self.n_rows, dtype="int32")
|
406
|
+
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
407
|
+
file_start = self._get_file_cumentries(i_file - 1)
|
408
|
+
i_local = self.current_i_entry - file_start
|
409
|
+
i = 0
|
410
|
+
|
411
|
+
while i < len(cur_entries):
|
412
|
+
# number of entries to read from this file
|
413
|
+
file_end = self._get_file_cumentries(i_file)
|
414
|
+
n = min(file_end - file_start - i_local, len(cur_entries) - i)
|
415
|
+
entries = self.get_file_entrylist(i_file)
|
416
|
+
|
417
|
+
if entries is None:
|
418
|
+
cur_entries[i : i + n] = self._get_file_cumlen(i_file - 1) + np.arange(
|
419
|
+
i_local, i_local + n
|
420
|
+
)
|
421
|
+
else:
|
422
|
+
cur_entries[i : i + n] = (
|
423
|
+
self._get_file_cumlen(i_file - 1) + entries[i_local : i_local + n]
|
424
|
+
)
|
425
|
+
|
426
|
+
i_file += 1
|
427
|
+
file_start = file_end
|
428
|
+
i_local = 0
|
429
|
+
i += n
|
430
|
+
|
431
|
+
return cur_entries
|
432
|
+
|
433
|
+
@property
|
434
|
+
def current_files(self) -> NDArray[str]:
|
435
|
+
"""Return list of file names for entries in buffer"""
|
436
|
+
cur_files = np.zeros(self.n_rows, dtype=object)
|
437
|
+
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
438
|
+
file_start = self._get_file_cumentries(i_file - 1)
|
439
|
+
i_local = self.current_i_entry - file_start
|
440
|
+
i = 0
|
441
|
+
|
442
|
+
while i < len(cur_files):
|
443
|
+
# number of entries to read from this file
|
444
|
+
file_end = self._get_file_cumentries(i_file)
|
445
|
+
n = min(file_end - file_start - i_local, len(cur_files) - i)
|
446
|
+
cur_files[i : i + n] = self.lh5_files[i_file]
|
447
|
+
|
448
|
+
i_file += 1
|
449
|
+
file_start = file_end
|
450
|
+
i_local = 0
|
451
|
+
i += n
|
452
|
+
|
453
|
+
return cur_files
|
454
|
+
|
455
|
+
@property
|
456
|
+
def current_groups(self) -> NDArray[str]:
|
457
|
+
"""Return list of group names for entries in buffer"""
|
458
|
+
cur_groups = np.zeros(self.n_rows, dtype=object)
|
459
|
+
i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
|
460
|
+
file_start = self._get_file_cumentries(i_file - 1)
|
461
|
+
i_local = self.current_i_entry - file_start
|
462
|
+
i = 0
|
463
|
+
|
464
|
+
while i < len(cur_groups):
|
465
|
+
# number of entries to read from this file
|
466
|
+
file_end = self._get_file_cumentries(i_file)
|
467
|
+
n = min(file_end - file_start - i_local, len(cur_groups) - i)
|
468
|
+
cur_groups[i : i + n] = self.groups[i_file]
|
469
|
+
|
470
|
+
i_file += 1
|
471
|
+
file_start = file_end
|
472
|
+
i_local = 0
|
473
|
+
i += n
|
474
|
+
|
475
|
+
return cur_groups
|
476
|
+
|
293
477
|
def __len__(self) -> int:
|
294
478
|
"""Return the total number of entries."""
|
295
479
|
return (
|
@@ -300,15 +484,15 @@ class LH5Iterator(typing.Iterator):
|
|
300
484
|
|
301
485
|
def __iter__(self) -> typing.Iterator:
|
302
486
|
"""Loop through entries in blocks of size buffer_len."""
|
303
|
-
self.
|
304
|
-
self.
|
487
|
+
self.current_i_entry = 0
|
488
|
+
self.next_i_entry = 0
|
305
489
|
return self
|
306
490
|
|
307
491
|
def __next__(self) -> tuple[LGDO, int, int]:
|
308
492
|
"""Read next buffer_len entries and return lh5_table, iterator entry
|
309
493
|
and n_rows read."""
|
310
|
-
buf, n_rows = self.read(self.
|
311
|
-
self.
|
494
|
+
buf, n_rows = self.read(self.next_i_entry)
|
495
|
+
self.next_i_entry = self.current_i_entry + n_rows
|
312
496
|
if n_rows == 0:
|
313
497
|
raise StopIteration
|
314
|
-
return (buf, self.
|
498
|
+
return (buf, self.current_i_entry, n_rows)
|