legend-pydataobj 1.11.6__py3-none-any.whl → 1.11.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lgdo/lh5/concat.py ADDED
@@ -0,0 +1,219 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ import logging
5
+
6
+ from lgdo.lh5 import LH5Iterator
7
+
8
+ from .. import Array, Scalar, Struct, Table, VectorOfVectors, lh5
9
+
10
+ log = logging.getLogger(__name__)
11
+
12
+
13
+ def _get_obj_list(
14
+ lh5_files: list, include_list: list | None = None, exclude_list: list | None = None
15
+ ) -> list[str]:
16
+ """Extract a list of lh5 objects to concatenate.
17
+
18
+ Parameters
19
+ ----------
20
+ lh5_files
21
+ list of input files to concatenate.
22
+ include_list
23
+ patterns for tables to include.
24
+ exclude_list
25
+ patterns for tables to exclude.
26
+
27
+ """
28
+ file0 = lh5_files[0]
29
+ obj_list_full = set(lh5.ls(file0, recursive=True))
30
+
31
+ # let's remove objects with nested LGDOs inside
32
+ to_remove = set()
33
+ for name in obj_list_full:
34
+ if len(fnmatch.filter(obj_list_full, f"{name}/*")) > 1:
35
+ to_remove.add(name)
36
+ obj_list_full -= to_remove
37
+
38
+ obj_list = set()
39
+ # now first remove excluded stuff
40
+ if exclude_list is not None:
41
+ for exc in exclude_list:
42
+ obj_list_full -= set(fnmatch.filter(obj_list_full, exc.strip("/")))
43
+
44
+ # then make list of included, based on latest list
45
+ if include_list is not None:
46
+ for inc in include_list:
47
+ obj_list |= set(fnmatch.filter(obj_list_full, inc.strip("/")))
48
+ else:
49
+ obj_list = obj_list_full
50
+
51
+ # sort
52
+ return sorted(obj_list)
53
+
54
+
55
+ def _get_lgdos(file, obj_list):
56
+ """Get name of LGDO objects."""
57
+
58
+ store = lh5.LH5Store()
59
+ h5f0 = store.gimme_file(file)
60
+
61
+ lgdos = []
62
+ lgdo_structs = {}
63
+
64
+ # loop over object list in the first file
65
+ for name in obj_list:
66
+ # now loop over groups starting from root
67
+ current = ""
68
+ for item in name.split("/"):
69
+ current = f"{current}/{item}".strip("/")
70
+
71
+ if current in lgdos:
72
+ break
73
+
74
+ # not even an LGDO (i.e. a plain HDF5 group)!
75
+ if "datatype" not in h5f0[current].attrs:
76
+ continue
77
+
78
+ # read as little as possible
79
+ obj = store.read(current, h5f0, n_rows=1)
80
+ if isinstance(obj, (Table, Array, VectorOfVectors)):
81
+ lgdos.append(current)
82
+
83
+ elif isinstance(obj, Struct):
84
+ # structs might be used in a "group-like" fashion (i.e. they might only
85
+ # contain array-like objects).
86
+ # note: handle after handling tables, as tables also satisfy this check.
87
+ lgdo_structs[current] = obj.attrs["datatype"]
88
+ continue
89
+
90
+ elif isinstance(obj, Scalar):
91
+ msg = f"cannot concat scalar field {current}"
92
+ log.warning(msg)
93
+
94
+ break
95
+
96
+ msg = f"first-level, array-like objects: {lgdos}"
97
+ log.info(msg)
98
+
99
+ msg = f"nested structs: {lgdo_structs}"
100
+ log.info(msg)
101
+
102
+ h5f0.close()
103
+
104
+ if lgdos == []:
105
+ msg = "did not find any field to concatenate, exit"
106
+ raise RuntimeError(msg)
107
+
108
+ return lgdos, lgdo_structs
109
+
110
+
111
+ def _inplace_table_filter(name, table, obj_list):
112
+ """filter objects nested in this LGDO"""
113
+ skm = fnmatch.filter(obj_list, f"{name}/*")
114
+ kept = {it.removeprefix(name).strip("/").split("/")[0] for it in skm}
115
+
116
+ # now remove fields
117
+ for k in list(table.keys()):
118
+ if k not in kept:
119
+ table.remove_column(k)
120
+
121
+ msg = f"fields left in table '{name}': {table.keys()}"
122
+ log.debug(msg)
123
+
124
+ # recurse!
125
+ for k2, v2 in table.items():
126
+ if not isinstance(v2, Table):
127
+ continue
128
+
129
+ _inplace_table_filter(f"{name}/{k2}", v2, obj_list)
130
+
131
+
132
+ def _remove_nested_fields(lgdos: dict, obj_list: list):
133
+ """Remove (nested) table fields based on obj_list."""
134
+
135
+ for key, val in lgdos.items():
136
+ if not isinstance(val, Table):
137
+ continue
138
+
139
+ _inplace_table_filter(key, val, obj_list)
140
+
141
+
142
+ def lh5concat(
143
+ lh5_files: list,
144
+ output: str,
145
+ overwrite: bool = False,
146
+ *,
147
+ include_list: list | None = None,
148
+ exclude_list: list | None = None,
149
+ ) -> None:
150
+ """Concatenate LGDO Arrays, VectorOfVectors and Tables in LH5 files.
151
+
152
+ Parameters
153
+ ----------
154
+ lh5_files
155
+ list of input files to concatenate.
156
+ output
157
+ path to the output file
158
+ include_list
159
+ patterns for tables to include.
160
+ exclude_list
161
+ patterns for tables to exclude.
162
+ """
163
+
164
+ if len(lh5_files) < 2:
165
+ msg = "you must provide at least two input files"
166
+ raise RuntimeError(msg)
167
+
168
+ # determine list of objects by recursively ls'ing first file
169
+ obj_list = _get_obj_list(
170
+ lh5_files, include_list=include_list, exclude_list=exclude_list
171
+ )
172
+
173
+ msg = f"objects matching include patterns {include_list} in {lh5_files[0]}: {obj_list}"
174
+ log.info(msg)
175
+
176
+ lgdos, lgdo_structs = _get_lgdos(lh5_files[0], obj_list)
177
+ first_done = False
178
+ store = lh5.LH5Store()
179
+
180
+ # loop over lgdo objects
181
+ for lgdo in lgdos:
182
+ # iterate over the files
183
+ for lh5_obj in LH5Iterator(lh5_files, lgdo):
184
+ data = {lgdo: lh5_obj}
185
+
186
+ # remove the nested fields
187
+ _remove_nested_fields(data, obj_list)
188
+
189
+ if first_done is False:
190
+ msg = f"creating output file {output}"
191
+ log.info(msg)
192
+
193
+ store.write(
194
+ data[lgdo],
195
+ lgdo,
196
+ output,
197
+ wo_mode="overwrite_file"
198
+ if (overwrite and not first_done)
199
+ else "write_safe",
200
+ )
201
+ first_done = True
202
+
203
+ else:
204
+ msg = f"appending to {output}"
205
+ log.info(msg)
206
+
207
+ if isinstance(data[lgdo], Table):
208
+ _inplace_table_filter(lgdo, data[lgdo], obj_list)
209
+
210
+ store.write(data[lgdo], lgdo, output, wo_mode="append")
211
+
212
+ if lgdo_structs != {}:
213
+ output_file = store.gimme_file(output, mode="a")
214
+ for struct, struct_dtype in lgdo_structs.items():
215
+ msg = f"reset datatype of struct {struct} to {struct_dtype}"
216
+ log.debug(msg)
217
+
218
+ output_file[struct].attrs["datatype"] = struct_dtype
219
+ output_file.close()
lgdo/lh5/core.py CHANGED
@@ -4,6 +4,7 @@ import bisect
4
4
  import inspect
5
5
  import sys
6
6
  from collections.abc import Mapping, Sequence
7
+ from contextlib import suppress
7
8
  from typing import Any
8
9
 
9
10
  import h5py
@@ -92,8 +93,7 @@ def read(
92
93
  will be set to ``True``, while the rest will default to ``False``.
93
94
  obj_buf
94
95
  Read directly into memory provided in `obj_buf`. Note: the buffer
95
- will be expanded to accommodate the data requested. To maintain the
96
- buffer length, send in ``n_rows = len(obj_buf)``.
96
+ will be resized to accommodate the data retrieved.
97
97
  obj_buf_start
98
98
  Start location in ``obj_buf`` for read. For concatenating data to
99
99
  array-like objects.
@@ -106,25 +106,25 @@ def read(
106
106
 
107
107
  Returns
108
108
  -------
109
- (object, n_rows_read)
110
- `object` is the read-out object `n_rows_read` is the number of rows
111
- successfully read out. Essential for arrays when the amount of data
112
- is smaller than the object buffer. For scalars and structs
113
- `n_rows_read` will be``1``. For tables it is redundant with
114
- ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
109
+ object
110
+ the read-out object
115
111
  """
116
112
  if isinstance(lh5_file, h5py.File):
117
113
  lh5_obj = lh5_file[name]
118
114
  elif isinstance(lh5_file, str):
119
115
  lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
120
- lh5_obj = lh5_file[name]
116
+ try:
117
+ lh5_obj = lh5_file[name]
118
+ except KeyError as ke:
119
+ err = f"Object {name} not found in file {lh5_file.filename}"
120
+ raise KeyError(err) from ke
121
121
  else:
122
- lh5_files = list(lh5_file)
123
-
124
- n_rows_read = 0
125
- obj_buf_is_new = False
122
+ if obj_buf is not None:
123
+ obj_buf.resize(obj_buf_start)
124
+ else:
125
+ obj_buf_start = 0
126
126
 
127
- for i, h5f in enumerate(lh5_files):
127
+ for i, h5f in enumerate(lh5_file):
128
128
  if (
129
129
  isinstance(idx, (list, tuple))
130
130
  and len(idx) > 0
@@ -146,33 +146,26 @@ def read(
146
146
  idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
147
147
  else:
148
148
  idx_i = None
149
- n_rows_i = n_rows - n_rows_read
150
149
 
151
- obj_ret = read(
150
+ obj_buf_start_i = len(obj_buf) if obj_buf else 0
151
+ n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
152
+
153
+ obj_buf = read(
152
154
  name,
153
155
  h5f,
154
- start_row,
156
+ start_row if i == 0 else 0,
155
157
  n_rows_i,
156
158
  idx_i,
157
159
  use_h5idx,
158
160
  field_mask,
159
161
  obj_buf,
160
- obj_buf_start,
162
+ obj_buf_start_i,
161
163
  decompress,
162
164
  )
163
- if isinstance(obj_ret, tuple):
164
- obj_buf, n_rows_read_i = obj_ret
165
- obj_buf_is_new = True
166
- else:
167
- obj_buf = obj_ret
168
- n_rows_read_i = len(obj_buf)
169
165
 
170
- n_rows_read += n_rows_read_i
171
- if n_rows_read >= n_rows or obj_buf is None:
172
- return obj_buf, n_rows_read
173
- start_row = 0
174
- obj_buf_start += n_rows_read_i
175
- return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
166
+ if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
167
+ return obj_buf
168
+ return obj_buf
176
169
 
177
170
  if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
178
171
  idx = idx[0]
@@ -192,8 +185,10 @@ def read(
192
185
  obj_buf_start=obj_buf_start,
193
186
  decompress=decompress,
194
187
  )
188
+ with suppress(AttributeError):
189
+ obj.resize(obj_buf_start + n_rows_read)
195
190
 
196
- return obj if obj_buf is None else (obj, n_rows_read)
191
+ return obj
197
192
 
198
193
 
199
194
  def write(
@@ -273,11 +268,13 @@ def write(
273
268
  end of array is the same as ``append``.
274
269
  - ``overwrite_file`` or ``of``: delete file if present prior to
275
270
  writing to it. `write_start` should be 0 (its ignored).
276
- - ``append_column`` or ``ac``: append columns from an
277
- :class:`~.lgdo.table.Table` `obj` only if there is an existing
278
- :class:`~.lgdo.table.Table` in the `lh5_file` with the same
279
- `name` and :class:`~.lgdo.table.Table.size`. If the sizes don't
280
- match, or if there are matching fields, it errors out.
271
+ - ``append_column`` or ``ac``: append fields/columns from an
272
+ :class:`~.lgdo.struct.Struct` `obj` (and derived types such as
273
+ :class:`~.lgdo.table.Table`) only if there is an existing
274
+ :class:`~.lgdo.struct.Struct` in the `lh5_file` with the same `name`.
275
+ If there are matching fields, it errors out. If appending to a
276
+ ``Table`` and the size of the new column is different from the size
277
+ of the existing table, it errors out.
281
278
  write_start
282
279
  row in the output file (if already existing) to start overwriting
283
280
  from.
lgdo/lh5/iterator.py CHANGED
@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
24
24
 
25
25
  This can be used as an iterator:
26
26
 
27
- >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
27
+
28
+ >>> for lh5_obj in LH5Iterator(...):
28
29
  >>> # do the thing!
29
30
 
30
31
  This is intended for if you are reading a large quantity of data. This
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
42
43
  In addition to accessing requested data via ``lh5_obj``, several
43
44
  properties exist to tell you where that data came from:
44
45
 
46
+ - lh5_it.current_i_entry: get the index within the entry list of the
47
+ first entry that is currently read
45
48
  - lh5_it.current_local_entries: get the entry numbers relative to the
46
49
  file the data came from
47
50
  - lh5_it.current_global_entries: get the entry number relative to the
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
49
52
  - lh5_it.current_files: get the file name corresponding to each entry
50
53
  - lh5_it.current_groups: get the group name corresponding to each entry
51
54
 
52
- This class can also be used either for random access:
55
+ This class can also be used for random access:
53
56
 
54
- >>> lh5_obj, n_rows = lh5_it.read(i_entry)
57
+ >>> lh5_obj = lh5_it.read(i_entry)
55
58
 
56
59
  to read the block of entries starting at i_entry. In case of multiple files
57
60
  or the use of an event selection, i_entry refers to a global event index
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
65
68
  base_path: str = "",
66
69
  entry_list: list[int] | list[list[int]] | None = None,
67
70
  entry_mask: list[bool] | list[list[bool]] | None = None,
71
+ i_start: int = 0,
72
+ n_entries: int | None = None,
68
73
  field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
69
74
  buffer_len: int = "100*MB",
70
75
  file_cache: int = 10,
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
89
94
  entry_mask
90
95
  mask of entries to read. If a list of arrays is provided, expect
91
96
  one for each file. Ignore if a selection list is provided.
97
+ i_start
98
+ index of first entry to start at when iterating
99
+ n_entries
100
+ number of entries to read before terminating iteration
92
101
  field_mask
93
102
  mask of which fields to read. See :meth:`LH5Store.read` for
94
103
  more details.
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
183
192
  msg = f"can't open any files from {lh5_files}"
184
193
  raise RuntimeError(msg)
185
194
 
186
- self.n_rows = 0
195
+ self.i_start = i_start
196
+ self.n_entries = n_entries
187
197
  self.current_i_entry = 0
188
198
  self.next_i_entry = 0
189
199
 
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
317
327
  )
318
328
  return self.global_entry_list
319
329
 
320
- def read(self, i_entry: int) -> tuple[LGDO, int]:
321
- """Read the nextlocal chunk of events, starting at i_entry. Return the
322
- LH5 buffer and number of rows read."""
323
- self.n_rows = 0
324
- i_file = np.searchsorted(self.entry_map, i_entry, "right")
330
+ def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
331
+ "Read the nextlocal chunk of events, starting at entry."
332
+ self.lh5_buffer.resize(0)
333
+
334
+ if n_entries is None:
335
+ n_entries = self.buffer_len
336
+ elif n_entries == 0:
337
+ return self.lh5_buffer
338
+ elif n_entries > self.buffer_len:
339
+ msg = "n_entries cannot be larger than buffer_len"
340
+ raise ValueError(msg)
325
341
 
326
342
  # if file hasn't been opened yet, search through files
327
343
  # sequentially until we find the right one
344
+ i_file = np.searchsorted(self.entry_map, i_entry, "right")
328
345
  if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
329
346
  while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
330
347
  i_file
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
332
349
  i_file += 1
333
350
 
334
351
  if i_file == len(self.lh5_files):
335
- return (self.lh5_buffer, self.n_rows)
352
+ return self.lh5_buffer
336
353
  local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
337
354
 
338
- while self.n_rows < self.buffer_len and i_file < len(self.file_map):
355
+ while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
339
356
  # Loop through files
340
357
  local_idx = self.get_file_entrylist(i_file)
341
358
  if local_idx is not None and len(local_idx) == 0:
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
344
361
  continue
345
362
 
346
363
  i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
347
- self.lh5_buffer, n_rows = self.lh5_st.read(
364
+ self.lh5_buffer = self.lh5_st.read(
348
365
  self.groups[i_file],
349
366
  self.lh5_files[i_file],
350
367
  start_row=i_local,
351
- n_rows=self.buffer_len - self.n_rows,
368
+ n_rows=n_entries - len(self.lh5_buffer),
352
369
  idx=local_idx,
353
370
  field_mask=self.field_mask,
354
371
  obj_buf=self.lh5_buffer,
355
- obj_buf_start=self.n_rows,
372
+ obj_buf_start=len(self.lh5_buffer),
356
373
  )
357
374
 
358
- self.n_rows += n_rows
359
375
  i_file += 1
360
376
  local_i_entry = 0
361
377
 
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
364
380
  if self.friend is not None:
365
381
  self.friend.read(i_entry)
366
382
 
367
- return (self.lh5_buffer, self.n_rows)
383
+ return self.lh5_buffer
368
384
 
369
385
  def reset_field_mask(self, mask):
370
386
  """Replaces the field mask of this iterator and any friends with mask"""
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
375
391
  @property
376
392
  def current_local_entries(self) -> NDArray[int]:
377
393
  """Return list of local file entries in buffer"""
378
- cur_entries = np.zeros(self.n_rows, dtype="int32")
394
+ cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
379
395
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
380
396
  file_start = self._get_file_cumentries(i_file - 1)
381
397
  i_local = self.current_i_entry - file_start
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
402
418
  @property
403
419
  def current_global_entries(self) -> NDArray[int]:
404
420
  """Return list of local file entries in buffer"""
405
- cur_entries = np.zeros(self.n_rows, dtype="int32")
421
+ cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
406
422
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
407
423
  file_start = self._get_file_cumentries(i_file - 1)
408
424
  i_local = self.current_i_entry - file_start
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
433
449
  @property
434
450
  def current_files(self) -> NDArray[str]:
435
451
  """Return list of file names for entries in buffer"""
436
- cur_files = np.zeros(self.n_rows, dtype=object)
452
+ cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
437
453
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
438
454
  file_start = self._get_file_cumentries(i_file - 1)
439
455
  i_local = self.current_i_entry - file_start
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
455
471
  @property
456
472
  def current_groups(self) -> NDArray[str]:
457
473
  """Return list of group names for entries in buffer"""
458
- cur_groups = np.zeros(self.n_rows, dtype=object)
474
+ cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
459
475
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
460
476
  file_start = self._get_file_cumentries(i_file - 1)
461
477
  i_local = self.current_i_entry - file_start
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
485
501
  def __iter__(self) -> typing.Iterator:
486
502
  """Loop through entries in blocks of size buffer_len."""
487
503
  self.current_i_entry = 0
488
- self.next_i_entry = 0
504
+ self.next_i_entry = self.i_start
489
505
  return self
490
506
 
491
507
  def __next__(self) -> tuple[LGDO, int, int]:
492
- """Read next buffer_len entries and return lh5_table, iterator entry
493
- and n_rows read."""
494
- buf, n_rows = self.read(self.next_i_entry)
495
- self.next_i_entry = self.current_i_entry + n_rows
496
- if n_rows == 0:
508
+ """Read next buffer_len entries and return lh5_table and iterator entry."""
509
+ n_entries = self.n_entries
510
+ if n_entries is not None:
511
+ n_entries = min(
512
+ self.buffer_len, n_entries + self.i_start - self.next_i_entry
513
+ )
514
+
515
+ buf = self.read(self.next_i_entry, n_entries)
516
+ if len(buf) == 0:
497
517
  raise StopIteration
498
- return (buf, self.current_i_entry, n_rows)
518
+ self.next_i_entry = self.current_i_entry + len(buf)
519
+ return buf
lgdo/lh5/store.py CHANGED
@@ -5,21 +5,20 @@ HDF5 files.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
- import bisect
9
8
  import logging
10
- import os
11
9
  import sys
12
10
  from collections import OrderedDict
13
11
  from collections.abc import Mapping, Sequence
14
12
  from inspect import signature
13
+ from pathlib import Path
15
14
  from typing import Any
16
15
 
17
16
  import h5py
18
- import numpy as np
19
17
  from numpy.typing import ArrayLike
20
18
 
21
19
  from .. import types
22
20
  from . import _serializers, utils
21
+ from .core import read
23
22
 
24
23
  log = logging.getLogger(__name__)
25
24
 
@@ -93,16 +92,16 @@ class LH5Store:
93
92
  return self.files[lh5_file]
94
93
 
95
94
  if self.base_path != "":
96
- full_path = os.path.join(self.base_path, lh5_file)
95
+ full_path = Path(self.base_path) / lh5_file
97
96
  else:
98
- full_path = lh5_file
97
+ full_path = Path(lh5_file)
99
98
 
100
- file_exists = os.path.exists(full_path)
99
+ file_exists = full_path.exists()
101
100
  if mode != "r":
102
- directory = os.path.dirname(full_path)
103
- if directory != "" and not os.path.exists(directory):
101
+ directory = full_path.parent
102
+ if directory != "" and not full_path.parent.exists():
104
103
  log.debug(f"making path {directory}")
105
- os.makedirs(directory)
104
+ directory.mkdir(parents=True, exist_ok=True)
106
105
 
107
106
  if mode == "r" and not file_exists:
108
107
  msg = f"file {full_path} not found"
@@ -155,7 +154,7 @@ class LH5Store:
155
154
  """Returns an LH5 object appropriate for use as a pre-allocated buffer
156
155
  in a read loop. Sets size to `size` if object has a size.
157
156
  """
158
- obj, n_rows = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
157
+ obj = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
159
158
  if hasattr(obj, "resize") and size is not None:
160
159
  obj.resize(new_size=size)
161
160
  return obj
@@ -182,72 +181,20 @@ class LH5Store:
182
181
  """
183
182
  # grab files from store
184
183
  if isinstance(lh5_file, (str, h5py.File)):
185
- lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
184
+ h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
186
185
  else:
187
- lh5_files = list(lh5_file)
188
- n_rows_read = 0
189
-
190
- for i, h5f in enumerate(lh5_files):
191
- if (
192
- isinstance(idx, (list, tuple))
193
- and len(idx) > 0
194
- and not np.isscalar(idx[0])
195
- ):
196
- # a list of lists: must be one per file
197
- idx_i = idx[i]
198
- elif idx is not None:
199
- # make idx a proper tuple if it's not one already
200
- if not (isinstance(idx, tuple) and len(idx) == 1):
201
- idx = (idx,)
202
- # idx is a long continuous array
203
- n_rows_i = utils.read_n_rows(name, h5f)
204
- # find the length of the subset of idx that contains indices
205
- # that are less than n_rows_i
206
- n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
207
- # now split idx into idx_i and the remainder
208
- idx_i = np.array(idx[0])[:n_rows_to_read_i]
209
- idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
210
- else:
211
- idx_i = None
212
- n_rows_i = n_rows - n_rows_read
213
-
214
- obj_buf, n_rows_read_i = self.read(
215
- name,
216
- h5f,
217
- start_row,
218
- n_rows_i,
219
- idx_i,
220
- use_h5idx,
221
- field_mask,
222
- obj_buf,
223
- obj_buf_start,
224
- decompress,
225
- )
226
-
227
- n_rows_read += n_rows_read_i
228
- if n_rows_read >= n_rows or obj_buf is None:
229
- return obj_buf, n_rows_read
230
- start_row = 0
231
- obj_buf_start += n_rows_read_i
232
- return obj_buf, n_rows_read
233
-
234
- if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
235
- idx = idx[0]
236
- if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
237
- idx = np.where(idx)[0]
238
-
239
- return _serializers._h5_read_lgdo(
240
- lh5_obj.id,
241
- lh5_obj.file.filename,
242
- lh5_obj.name,
243
- start_row=start_row,
244
- n_rows=n_rows,
245
- idx=idx,
246
- use_h5idx=use_h5idx,
247
- field_mask=field_mask,
248
- obj_buf=obj_buf,
249
- obj_buf_start=obj_buf_start,
250
- decompress=decompress,
186
+ h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
187
+ return read(
188
+ name,
189
+ h5f,
190
+ start_row,
191
+ n_rows,
192
+ idx,
193
+ use_h5idx,
194
+ field_mask,
195
+ obj_buf,
196
+ obj_buf_start,
197
+ decompress,
251
198
  )
252
199
 
253
200
  def write(