legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.11.8
3
+ Version: 1.11.11
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,8 +1,9 @@
1
- legend_pydataobj-1.11.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
- lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
3
- lgdo/_version.py,sha256=L3yqa8Aseny4agSopGdx4D8Sk3htDn12nBlc9W8wWes,513
1
+ legend_pydataobj-1.11.11.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
+ lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
3
+ lgdo/_version.py,sha256=HzpeLL9TNpOYOyQgpvUDsI17SsmttCLNA6_o2M0opbs,515
4
4
  lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
5
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
6
+ lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
6
7
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
7
8
  lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
8
9
  lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -12,19 +13,19 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
12
13
  lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
13
14
  lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
14
15
  lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
15
- lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
16
- lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
17
- lgdo/lh5/core.py,sha256=3o6JsX6aNkMa3plX96a4vG7LWmfco33OuUzV_mMFStQ,13626
18
- lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
16
+ lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
17
+ lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
18
+ lgdo/lh5/core.py,sha256=HT50rolOtTijgaGFskRgzoRbC0w-kxrRS2v9O5Q9Ugo,14067
19
+ lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
19
20
  lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
20
- lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
21
- lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
22
- lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
23
- lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
21
+ lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
22
+ lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
23
+ lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
24
+ lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
24
25
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
25
26
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
27
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
27
- lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
28
+ lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
28
29
  lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
29
30
  lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
30
31
  lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
@@ -32,24 +33,24 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
32
33
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
33
34
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
35
  lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
35
- lgdo/lh5/_serializers/write/composite.py,sha256=wiq9O3Cb08wrAm14L7Jz69ppL7SnYEDHgW6pJtY8aBI,9425
36
+ lgdo/lh5/_serializers/write/composite.py,sha256=eEfisBAxpF1Q8v4AbORbBQyxg0p5ugMo9cBjicOC5KI,9979
36
37
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
37
38
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
38
39
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
39
- lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
40
+ lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
40
41
  lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
41
- lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
42
+ lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
42
43
  lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
43
- lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
44
- lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
44
+ lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
45
+ lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
45
46
  lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
46
- lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
47
- lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
48
- lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
49
- lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
50
- lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
51
- legend_pydataobj-1.11.8.dist-info/METADATA,sha256=qFXYWIsv8umqbHxK7ltDFwbPU_z5STG6E5CUAxZk6EY,44443
52
- legend_pydataobj-1.11.8.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
53
- legend_pydataobj-1.11.8.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
54
- legend_pydataobj-1.11.8.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
55
- legend_pydataobj-1.11.8.dist-info/RECORD,,
47
+ lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
48
+ lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
49
+ lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
50
+ lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
51
+ lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
52
+ legend_pydataobj-1.11.11.dist-info/METADATA,sha256=31ymnKEL5SpQecBon8Pg_SuL7axx4VityFgjGpm46bM,44444
53
+ legend_pydataobj-1.11.11.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
54
+ legend_pydataobj-1.11.11.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
+ legend_pydataobj-1.11.11.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
+ legend_pydataobj-1.11.11.dist-info/RECORD,,
lgdo/__init__.py CHANGED
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
45
45
  from __future__ import annotations
46
46
 
47
47
  from ._version import version as __version__
48
- from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
48
+ from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
49
49
  from .types import (
50
50
  LGDO,
51
51
  Array,
@@ -69,6 +69,7 @@ __all__ = [
69
69
  "FixedSizeArray",
70
70
  "Histogram",
71
71
  "LH5Iterator",
72
+ "LH5Store",
72
73
  "Scalar",
73
74
  "Struct",
74
75
  "Table",
@@ -76,10 +77,8 @@ __all__ = [
76
77
  "VectorOfVectors",
77
78
  "WaveformTable",
78
79
  "__version__",
80
+ "load_dfs",
81
+ "load_nda",
79
82
  "ls",
80
- "read",
81
- "read_as",
82
- "read_n_rows",
83
83
  "show",
84
- "write",
85
84
  ]
lgdo/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.11.8'
21
- __version_tuple__ = version_tuple = (1, 11, 8)
20
+ __version__ = version = '1.11.11'
21
+ __version_tuple__ = version_tuple = (1, 11, 11)
lgdo/lh5/__init__.py CHANGED
@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
11
11
  from .core import read, read_as, write
12
12
  from .iterator import LH5Iterator
13
13
  from .store import LH5Store
14
- from .tools import ls, show
14
+ from .tools import load_dfs, load_nda, ls, show
15
15
  from .utils import read_n_rows
16
16
 
17
17
  __all__ = [
@@ -19,6 +19,8 @@ __all__ = [
19
19
  "LH5Iterator",
20
20
  "LH5Store",
21
21
  "concat",
22
+ "load_dfs",
23
+ "load_nda",
22
24
  "ls",
23
25
  "read",
24
26
  "read_as",
@@ -353,13 +353,15 @@ def _h5_read_table(
353
353
  table = Table(col_dict=col_dict, attrs=attrs)
354
354
 
355
355
  # set (write) loc to end of tree
356
- table.resize(do_warn=True)
356
+ table.loc = n_rows_read
357
357
  return table, n_rows_read
358
358
 
359
359
  # We have read all fields into the object buffer. Run
360
360
  # checks: All columns should be the same size. So update
361
361
  # table's size as necessary, warn if any mismatches are found
362
362
  obj_buf.resize(do_warn=True)
363
+ # set (write) loc to end of tree
364
+ obj_buf.loc = obj_buf_start + n_rows_read
363
365
 
364
366
  # check attributes
365
367
  utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import os
4
5
  from inspect import signature
5
- from pathlib import Path
6
6
 
7
7
  import h5py
8
8
 
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
55
  if not isinstance(lh5_file, h5py.File):
56
- mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
56
+ mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
57
57
  lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
59
  log.debug(
@@ -191,15 +191,26 @@ def _h5_write_struct(
191
191
 
192
192
  # In order to append a column, we need to update the
193
193
  # `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
194
- # the new fields. One way to do this is to override
195
- # `obj.attrs["datatype"]` to include old and new fields. Then we
196
- # can write the fields to the struct/table as normal.
194
+ # the new fields. One way to do this is to override `obj.attrs["datatype"]`
195
+ # to include old and new fields. Then we can write the fields to the
196
+ # struct/table as normal.
197
197
  if wo_mode == "ac":
198
+ if name not in group:
199
+ msg = "Cannot append column to non-existing struct on disk"
200
+ raise LH5EncodeError(msg, lh5_file, group, name)
201
+
198
202
  old_group = utils.get_h5_group(name, group)
203
+ if "datatype" not in old_group.attrs:
204
+ msg = "Cannot append column to an existing non-LGDO object on disk"
205
+ raise LH5EncodeError(msg, lh5_file, group, name)
206
+
199
207
  lgdotype = datatype.datatype(old_group.attrs["datatype"])
200
208
  fields = datatype.get_struct_fields(old_group.attrs["datatype"])
201
209
  if lgdotype is not type(obj):
202
- msg = f"Trying to append columns to an object of different type {lgdotype.__name__}!={type(obj)}"
210
+ msg = (
211
+ "Trying to append columns to an object of different "
212
+ f"type {lgdotype.__name__}!={type(obj)}"
213
+ )
203
214
  raise LH5EncodeError(msg, lh5_file, group, name)
204
215
 
205
216
  # If the mode is `append_column`, make sure we aren't appending
@@ -230,14 +241,22 @@ def _h5_write_struct(
230
241
 
231
242
  obj.attrs["datatype"] = obj.datatype_name() + "{" + ",".join(fields) + "}"
232
243
 
244
+ # propagating wo_mode="ac" to nested LGDOs does not make any sense
245
+ wo_mode = "append"
246
+
247
+ # overwrite attributes of the existing struct
248
+ attrs_overwrite = True
249
+ else:
250
+ attrs_overwrite = wo_mode == "o"
251
+
233
252
  group = utils.get_h5_group(
234
253
  name,
235
254
  group,
236
255
  grp_attrs=obj.attrs,
237
- overwrite=(wo_mode in ["o", "ac"]),
256
+ overwrite=attrs_overwrite,
238
257
  )
239
258
  # If the mode is overwrite, then we need to peek into the file's
240
- # table's existing fields. If we are writing a new table to the
259
+ # table's existing fields. If we are writing a new table to the
241
260
  # group that does not contain an old field, we should delete that
242
261
  # old field from the file
243
262
  if wo_mode == "o":
@@ -266,11 +285,9 @@ def _h5_write_struct(
266
285
  else:
267
286
  obj_fld = obj[field]
268
287
 
269
- # Convert keys to string for dataset names
270
- f = str(field)
271
288
  _h5_write_lgdo(
272
289
  obj_fld,
273
- f,
290
+ str(field),
274
291
  lh5_file,
275
292
  group=group,
276
293
  start_row=start_row,
lgdo/lh5/concat.py CHANGED
@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
76
76
  continue
77
77
 
78
78
  # read as little as possible
79
- obj = store.read(current, h5f0, n_rows=1)
79
+ obj, _ = store.read(current, h5f0, n_rows=1)
80
80
  if isinstance(obj, (Table, Array, VectorOfVectors)):
81
81
  lgdos.append(current)
82
82
 
@@ -139,6 +139,12 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
139
139
  _inplace_table_filter(key, val, obj_list)
140
140
 
141
141
 
142
+ def _slice(obj, n_rows):
143
+ ak_obj = obj.view_as("ak")[:n_rows]
144
+ obj_type = type(obj)
145
+ return obj_type(ak_obj)
146
+
147
+
142
148
  def lh5concat(
143
149
  lh5_files: list,
144
150
  output: str,
@@ -180,8 +186,8 @@ def lh5concat(
180
186
  # loop over lgdo objects
181
187
  for lgdo in lgdos:
182
188
  # iterate over the files
183
- for lh5_obj in LH5Iterator(lh5_files, lgdo):
184
- data = {lgdo: lh5_obj}
189
+ for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
190
+ data = {lgdo: _slice(lh5_obj, n_rows)}
185
191
 
186
192
  # remove the nested fields
187
193
  _remove_nested_fields(data, obj_list)
lgdo/lh5/core.py CHANGED
@@ -4,7 +4,6 @@ import bisect
4
4
  import inspect
5
5
  import sys
6
6
  from collections.abc import Mapping, Sequence
7
- from contextlib import suppress
8
7
  from typing import Any
9
8
 
10
9
  import h5py
@@ -93,7 +92,8 @@ def read(
93
92
  will be set to ``True``, while the rest will default to ``False``.
94
93
  obj_buf
95
94
  Read directly into memory provided in `obj_buf`. Note: the buffer
96
- will be resized to accommodate the data retrieved.
95
+ will be expanded to accommodate the data requested. To maintain the
96
+ buffer length, send in ``n_rows = len(obj_buf)``.
97
97
  obj_buf_start
98
98
  Start location in ``obj_buf`` for read. For concatenating data to
99
99
  array-like objects.
@@ -106,25 +106,25 @@ def read(
106
106
 
107
107
  Returns
108
108
  -------
109
- object
110
- the read-out object
109
+ (object, n_rows_read)
110
+ `object` is the read-out object `n_rows_read` is the number of rows
111
+ successfully read out. Essential for arrays when the amount of data
112
+ is smaller than the object buffer. For scalars and structs
113
+ `n_rows_read` will be``1``. For tables it is redundant with
114
+ ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
111
115
  """
112
116
  if isinstance(lh5_file, h5py.File):
113
117
  lh5_obj = lh5_file[name]
114
118
  elif isinstance(lh5_file, str):
115
119
  lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
116
- try:
117
- lh5_obj = lh5_file[name]
118
- except KeyError as ke:
119
- err = f"Object {name} not found in file {lh5_file.filename}"
120
- raise KeyError(err) from ke
120
+ lh5_obj = lh5_file[name]
121
121
  else:
122
- if obj_buf is not None:
123
- obj_buf.resize(obj_buf_start)
124
- else:
125
- obj_buf_start = 0
122
+ lh5_files = list(lh5_file)
123
+
124
+ n_rows_read = 0
125
+ obj_buf_is_new = False
126
126
 
127
- for i, h5f in enumerate(lh5_file):
127
+ for i, h5f in enumerate(lh5_files):
128
128
  if (
129
129
  isinstance(idx, (list, tuple))
130
130
  and len(idx) > 0
@@ -146,26 +146,33 @@ def read(
146
146
  idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
147
147
  else:
148
148
  idx_i = None
149
+ n_rows_i = n_rows - n_rows_read
149
150
 
150
- obj_buf_start_i = len(obj_buf) if obj_buf else 0
151
- n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
152
-
153
- obj_buf = read(
151
+ obj_ret = read(
154
152
  name,
155
153
  h5f,
156
- start_row if i == 0 else 0,
154
+ start_row,
157
155
  n_rows_i,
158
156
  idx_i,
159
157
  use_h5idx,
160
158
  field_mask,
161
159
  obj_buf,
162
- obj_buf_start_i,
160
+ obj_buf_start,
163
161
  decompress,
164
162
  )
163
+ if isinstance(obj_ret, tuple):
164
+ obj_buf, n_rows_read_i = obj_ret
165
+ obj_buf_is_new = True
166
+ else:
167
+ obj_buf = obj_ret
168
+ n_rows_read_i = len(obj_buf)
165
169
 
166
- if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
167
- return obj_buf
168
- return obj_buf
170
+ n_rows_read += n_rows_read_i
171
+ if n_rows_read >= n_rows or obj_buf is None:
172
+ return obj_buf, n_rows_read
173
+ start_row = 0
174
+ obj_buf_start += n_rows_read_i
175
+ return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
169
176
 
170
177
  if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
171
178
  idx = idx[0]
@@ -185,10 +192,8 @@ def read(
185
192
  obj_buf_start=obj_buf_start,
186
193
  decompress=decompress,
187
194
  )
188
- with suppress(AttributeError):
189
- obj.resize(obj_buf_start + n_rows_read)
190
195
 
191
- return obj
196
+ return obj if obj_buf is None else (obj, n_rows_read)
192
197
 
193
198
 
194
199
  def write(
lgdo/lh5/datatype.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import re
4
4
  from collections import OrderedDict
5
+ from itertools import permutations as perm
5
6
 
6
7
  from .. import types as lgdo
7
8
 
@@ -14,7 +15,10 @@ _lgdo_datatype_map: dict[str, lgdo.LGDO] = OrderedDict(
14
15
  lgdo.ArrayOfEncodedEqualSizedArrays,
15
16
  r"^array_of_encoded_equalsized_arrays<1,1>\{.+\}$",
16
17
  ),
17
- (lgdo.Histogram, r"^struct\{binning,weights,isdensity\}$"),
18
+ (
19
+ lgdo.Histogram,
20
+ rf"^struct\{{(?:{'|'.join([','.join(p) for p in perm(['binning', 'weights', 'isdensity'])])})\}}$",
21
+ ),
18
22
  (lgdo.Struct, r"^struct\{.*\}$"),
19
23
  (lgdo.Table, r"^table\{.*\}$"),
20
24
  (lgdo.FixedSizeArray, r"^fixedsize_array<\d+>\{.+\}$"),
lgdo/lh5/iterator.py CHANGED
@@ -24,8 +24,7 @@ class LH5Iterator(typing.Iterator):
24
24
 
25
25
  This can be used as an iterator:
26
26
 
27
-
28
- >>> for lh5_obj in LH5Iterator(...):
27
+ >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
29
28
  >>> # do the thing!
30
29
 
31
30
  This is intended for if you are reading a large quantity of data. This
@@ -43,8 +42,6 @@ class LH5Iterator(typing.Iterator):
43
42
  In addition to accessing requested data via ``lh5_obj``, several
44
43
  properties exist to tell you where that data came from:
45
44
 
46
- - lh5_it.current_i_entry: get the index within the entry list of the
47
- first entry that is currently read
48
45
  - lh5_it.current_local_entries: get the entry numbers relative to the
49
46
  file the data came from
50
47
  - lh5_it.current_global_entries: get the entry number relative to the
@@ -52,9 +49,9 @@ class LH5Iterator(typing.Iterator):
52
49
  - lh5_it.current_files: get the file name corresponding to each entry
53
50
  - lh5_it.current_groups: get the group name corresponding to each entry
54
51
 
55
- This class can also be used for random access:
52
+ This class can also be used either for random access:
56
53
 
57
- >>> lh5_obj = lh5_it.read(i_entry)
54
+ >>> lh5_obj, n_rows = lh5_it.read(i_entry)
58
55
 
59
56
  to read the block of entries starting at i_entry. In case of multiple files
60
57
  or the use of an event selection, i_entry refers to a global event index
@@ -68,8 +65,6 @@ class LH5Iterator(typing.Iterator):
68
65
  base_path: str = "",
69
66
  entry_list: list[int] | list[list[int]] | None = None,
70
67
  entry_mask: list[bool] | list[list[bool]] | None = None,
71
- i_start: int = 0,
72
- n_entries: int | None = None,
73
68
  field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
74
69
  buffer_len: int = "100*MB",
75
70
  file_cache: int = 10,
@@ -94,10 +89,6 @@ class LH5Iterator(typing.Iterator):
94
89
  entry_mask
95
90
  mask of entries to read. If a list of arrays is provided, expect
96
91
  one for each file. Ignore if a selection list is provided.
97
- i_start
98
- index of first entry to start at when iterating
99
- n_entries
100
- number of entries to read before terminating iteration
101
92
  field_mask
102
93
  mask of which fields to read. See :meth:`LH5Store.read` for
103
94
  more details.
@@ -192,8 +183,7 @@ class LH5Iterator(typing.Iterator):
192
183
  msg = f"can't open any files from {lh5_files}"
193
184
  raise RuntimeError(msg)
194
185
 
195
- self.i_start = i_start
196
- self.n_entries = n_entries
186
+ self.n_rows = 0
197
187
  self.current_i_entry = 0
198
188
  self.next_i_entry = 0
199
189
 
@@ -327,21 +317,14 @@ class LH5Iterator(typing.Iterator):
327
317
  )
328
318
  return self.global_entry_list
329
319
 
330
- def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
331
- "Read the nextlocal chunk of events, starting at entry."
332
- self.lh5_buffer.resize(0)
333
-
334
- if n_entries is None:
335
- n_entries = self.buffer_len
336
- elif n_entries == 0:
337
- return self.lh5_buffer
338
- elif n_entries > self.buffer_len:
339
- msg = "n_entries cannot be larger than buffer_len"
340
- raise ValueError(msg)
320
+ def read(self, i_entry: int) -> tuple[LGDO, int]:
321
+ """Read the nextlocal chunk of events, starting at i_entry. Return the
322
+ LH5 buffer and number of rows read."""
323
+ self.n_rows = 0
324
+ i_file = np.searchsorted(self.entry_map, i_entry, "right")
341
325
 
342
326
  # if file hasn't been opened yet, search through files
343
327
  # sequentially until we find the right one
344
- i_file = np.searchsorted(self.entry_map, i_entry, "right")
345
328
  if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
346
329
  while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
347
330
  i_file
@@ -349,10 +332,10 @@ class LH5Iterator(typing.Iterator):
349
332
  i_file += 1
350
333
 
351
334
  if i_file == len(self.lh5_files):
352
- return self.lh5_buffer
335
+ return (self.lh5_buffer, self.n_rows)
353
336
  local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
354
337
 
355
- while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
338
+ while self.n_rows < self.buffer_len and i_file < len(self.file_map):
356
339
  # Loop through files
357
340
  local_idx = self.get_file_entrylist(i_file)
358
341
  if local_idx is not None and len(local_idx) == 0:
@@ -361,17 +344,18 @@ class LH5Iterator(typing.Iterator):
361
344
  continue
362
345
 
363
346
  i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
364
- self.lh5_buffer = self.lh5_st.read(
347
+ self.lh5_buffer, n_rows = self.lh5_st.read(
365
348
  self.groups[i_file],
366
349
  self.lh5_files[i_file],
367
350
  start_row=i_local,
368
- n_rows=n_entries - len(self.lh5_buffer),
351
+ n_rows=self.buffer_len - self.n_rows,
369
352
  idx=local_idx,
370
353
  field_mask=self.field_mask,
371
354
  obj_buf=self.lh5_buffer,
372
- obj_buf_start=len(self.lh5_buffer),
355
+ obj_buf_start=self.n_rows,
373
356
  )
374
357
 
358
+ self.n_rows += n_rows
375
359
  i_file += 1
376
360
  local_i_entry = 0
377
361
 
@@ -380,7 +364,7 @@ class LH5Iterator(typing.Iterator):
380
364
  if self.friend is not None:
381
365
  self.friend.read(i_entry)
382
366
 
383
- return self.lh5_buffer
367
+ return (self.lh5_buffer, self.n_rows)
384
368
 
385
369
  def reset_field_mask(self, mask):
386
370
  """Replaces the field mask of this iterator and any friends with mask"""
@@ -391,7 +375,7 @@ class LH5Iterator(typing.Iterator):
391
375
  @property
392
376
  def current_local_entries(self) -> NDArray[int]:
393
377
  """Return list of local file entries in buffer"""
394
- cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
378
+ cur_entries = np.zeros(self.n_rows, dtype="int32")
395
379
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
396
380
  file_start = self._get_file_cumentries(i_file - 1)
397
381
  i_local = self.current_i_entry - file_start
@@ -418,7 +402,7 @@ class LH5Iterator(typing.Iterator):
418
402
  @property
419
403
  def current_global_entries(self) -> NDArray[int]:
420
404
  """Return list of local file entries in buffer"""
421
- cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
405
+ cur_entries = np.zeros(self.n_rows, dtype="int32")
422
406
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
423
407
  file_start = self._get_file_cumentries(i_file - 1)
424
408
  i_local = self.current_i_entry - file_start
@@ -449,7 +433,7 @@ class LH5Iterator(typing.Iterator):
449
433
  @property
450
434
  def current_files(self) -> NDArray[str]:
451
435
  """Return list of file names for entries in buffer"""
452
- cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
436
+ cur_files = np.zeros(self.n_rows, dtype=object)
453
437
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
454
438
  file_start = self._get_file_cumentries(i_file - 1)
455
439
  i_local = self.current_i_entry - file_start
@@ -471,7 +455,7 @@ class LH5Iterator(typing.Iterator):
471
455
  @property
472
456
  def current_groups(self) -> NDArray[str]:
473
457
  """Return list of group names for entries in buffer"""
474
- cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
458
+ cur_groups = np.zeros(self.n_rows, dtype=object)
475
459
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
476
460
  file_start = self._get_file_cumentries(i_file - 1)
477
461
  i_local = self.current_i_entry - file_start
@@ -501,19 +485,14 @@ class LH5Iterator(typing.Iterator):
501
485
  def __iter__(self) -> typing.Iterator:
502
486
  """Loop through entries in blocks of size buffer_len."""
503
487
  self.current_i_entry = 0
504
- self.next_i_entry = self.i_start
488
+ self.next_i_entry = 0
505
489
  return self
506
490
 
507
491
  def __next__(self) -> tuple[LGDO, int, int]:
508
- """Read next buffer_len entries and return lh5_table and iterator entry."""
509
- n_entries = self.n_entries
510
- if n_entries is not None:
511
- n_entries = min(
512
- self.buffer_len, n_entries + self.i_start - self.next_i_entry
513
- )
514
-
515
- buf = self.read(self.next_i_entry, n_entries)
516
- if len(buf) == 0:
492
+ """Read next buffer_len entries and return lh5_table, iterator entry
493
+ and n_rows read."""
494
+ buf, n_rows = self.read(self.next_i_entry)
495
+ self.next_i_entry = self.current_i_entry + n_rows
496
+ if n_rows == 0:
517
497
  raise StopIteration
518
- self.next_i_entry = self.current_i_entry + len(buf)
519
- return buf
498
+ return (buf, self.current_i_entry, n_rows)