legend-pydataobj 1.11.13__py3-none-any.whl → 1.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.11.13
3
+ Version: 1.12.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,9 +1,8 @@
1
- legend_pydataobj-1.11.13.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
- lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
3
- lgdo/_version.py,sha256=Qst-iXDl1kYXwH8uB96MoJHj4GrtPgOsKRVwqxfnymw,515
1
+ legend_pydataobj-1.12.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
+ lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
3
+ lgdo/_version.py,sha256=X7AXkrxMLYa0fUCdwZA2oOfiFkQJiuenTXzRghkc4eU,513
4
4
  lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
5
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
6
- lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
7
6
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
8
7
  lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
9
8
  lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -13,20 +12,20 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
13
12
  lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
14
13
  lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
15
14
  lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
16
- lgdo/lh5/__init__.py,sha256=-LtqfwKYaongsORkr3woqM2wVVcdVIIp1PI-jogmK8k,800
17
- lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
18
- lgdo/lh5/core.py,sha256=HT50rolOtTijgaGFskRgzoRbC0w-kxrRS2v9O5Q9Ugo,14067
15
+ lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
16
+ lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
17
+ lgdo/lh5/core.py,sha256=U0ZZk6EmojRRYFBEo_bMy7jZ3SKBU41MIsSulyFxZIU,13752
19
18
  lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
20
19
  lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
21
- lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
20
+ lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
22
21
  lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
23
- lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
24
- lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
25
- lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
22
+ lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
23
+ lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
24
+ lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
26
25
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
27
26
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
27
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
29
- lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
28
+ lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
30
29
  lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
31
30
  lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
32
31
  lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
@@ -34,24 +33,24 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
34
33
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
35
34
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
35
  lgdo/lh5/_serializers/write/array.py,sha256=gAB1EjPa9YojPqohVwY_VPeuY7_bLN-lttCmfgty-qk,3175
37
- lgdo/lh5/_serializers/write/composite.py,sha256=qYJIqpQxc1a0hmazxYCPMv-ar9_TsyK-zWcBmPleMfM,10011
36
+ lgdo/lh5/_serializers/write/composite.py,sha256=eX5an6YZ5I7zf1z90mfzKYYJQoS-ux10rjDrUdevW6Y,10025
38
37
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
39
38
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
40
39
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
41
- lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
40
+ lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
42
41
  lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
43
- lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
42
+ lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
44
43
  lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
45
- lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
46
- lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
44
+ lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
45
+ lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
47
46
  lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
48
47
  lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
49
- lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
50
- lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
51
- lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
52
- lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
53
- legend_pydataobj-1.11.13.dist-info/METADATA,sha256=f1aDC_Ev6RoXyH2PWkyHj2rCSkooR6n1bCFG84__0x4,44444
54
- legend_pydataobj-1.11.13.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
55
- legend_pydataobj-1.11.13.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
56
- legend_pydataobj-1.11.13.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
57
- legend_pydataobj-1.11.13.dist-info/RECORD,,
48
+ lgdo/types/table.py,sha256=hvOwhFkm-_CkNhGmD8SJoeepZcwFY6ItYOS76LztKtA,20158
49
+ lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
50
+ lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
51
+ lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
52
+ legend_pydataobj-1.12.0.dist-info/METADATA,sha256=76785CT-1QRlVf6WOFnbnRWUiC6zSUnMxFR2km15kQ4,44443
53
+ legend_pydataobj-1.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
54
+ legend_pydataobj-1.12.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
+ legend_pydataobj-1.12.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
+ legend_pydataobj-1.12.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.8.0)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
lgdo/__init__.py CHANGED
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
45
45
  from __future__ import annotations
46
46
 
47
47
  from ._version import version as __version__
48
- from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
48
+ from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
49
49
  from .types import (
50
50
  LGDO,
51
51
  Array,
@@ -69,7 +69,6 @@ __all__ = [
69
69
  "FixedSizeArray",
70
70
  "Histogram",
71
71
  "LH5Iterator",
72
- "LH5Store",
73
72
  "Scalar",
74
73
  "Struct",
75
74
  "Table",
@@ -77,8 +76,10 @@ __all__ = [
77
76
  "VectorOfVectors",
78
77
  "WaveformTable",
79
78
  "__version__",
80
- "load_dfs",
81
- "load_nda",
82
79
  "ls",
80
+ "read",
81
+ "read_as",
82
+ "read_n_rows",
83
83
  "show",
84
+ "write",
84
85
  ]
lgdo/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.11.13'
21
- __version_tuple__ = version_tuple = (1, 11, 13)
20
+ __version__ = version = '1.12.0'
21
+ __version_tuple__ = version_tuple = (1, 12, 0)
lgdo/lh5/__init__.py CHANGED
@@ -10,7 +10,7 @@ from __future__ import annotations
10
10
  from .core import read, read_as, write
11
11
  from .iterator import LH5Iterator
12
12
  from .store import LH5Store
13
- from .tools import load_dfs, load_nda, ls, show
13
+ from .tools import ls, show
14
14
  from .utils import read_n_rows
15
15
 
16
16
  __all__ = [
@@ -18,8 +18,6 @@ __all__ = [
18
18
  "LH5Store",
19
19
  "concat",
20
20
  "default_hdf5_settings",
21
- "load_dfs",
22
- "load_nda",
23
21
  "ls",
24
22
  "read",
25
23
  "read_as",
@@ -353,15 +353,13 @@ def _h5_read_table(
353
353
  table = Table(col_dict=col_dict, attrs=attrs)
354
354
 
355
355
  # set (write) loc to end of tree
356
- table.loc = n_rows_read
356
+ table.resize(do_warn=True)
357
357
  return table, n_rows_read
358
358
 
359
359
  # We have read all fields into the object buffer. Run
360
360
  # checks: All columns should be the same size. So update
361
361
  # table's size as necessary, warn if any mismatches are found
362
362
  obj_buf.resize(do_warn=True)
363
- # set (write) loc to end of tree
364
- obj_buf.loc = obj_buf_start + n_rows_read
365
363
 
366
364
  # check attributes
367
365
  utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import os
5
4
  from inspect import signature
5
+ from pathlib import Path
6
6
 
7
7
  import h5py
8
8
 
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
55
  if not isinstance(lh5_file, h5py.File):
56
- mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
56
+ mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57
57
  lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
59
  log.debug(
lgdo/lh5/concat.py CHANGED
@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
76
76
  continue
77
77
 
78
78
  # read as little as possible
79
- obj, _ = store.read(current, h5f0, n_rows=1)
79
+ obj = store.read(current, h5f0, n_rows=1)
80
80
  if isinstance(obj, (Table, Array, VectorOfVectors)):
81
81
  lgdos.append(current)
82
82
 
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
139
139
  _inplace_table_filter(key, val, obj_list)
140
140
 
141
141
 
142
- def _slice(obj, n_rows):
143
- ak_obj = obj.view_as("ak")[:n_rows]
144
- obj_type = type(obj)
145
- return obj_type(ak_obj)
146
-
147
-
148
142
  def lh5concat(
149
143
  lh5_files: list,
150
144
  output: str,
@@ -186,8 +180,8 @@ def lh5concat(
186
180
  # loop over lgdo objects
187
181
  for lgdo in lgdos:
188
182
  # iterate over the files
189
- for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
190
- data = {lgdo: _slice(lh5_obj, n_rows)}
183
+ for lh5_obj in LH5Iterator(lh5_files, lgdo):
184
+ data = {lgdo: lh5_obj}
191
185
 
192
186
  # remove the nested fields
193
187
  _remove_nested_fields(data, obj_list)
lgdo/lh5/core.py CHANGED
@@ -4,6 +4,8 @@ import bisect
4
4
  import inspect
5
5
  import sys
6
6
  from collections.abc import Mapping, Sequence
7
+ from contextlib import suppress
8
+ from pathlib import Path
7
9
  from typing import Any
8
10
 
9
11
  import h5py
@@ -92,8 +94,7 @@ def read(
92
94
  will be set to ``True``, while the rest will default to ``False``.
93
95
  obj_buf
94
96
  Read directly into memory provided in `obj_buf`. Note: the buffer
95
- will be expanded to accommodate the data requested. To maintain the
96
- buffer length, send in ``n_rows = len(obj_buf)``.
97
+ will be resized to accommodate the data retrieved.
97
98
  obj_buf_start
98
99
  Start location in ``obj_buf`` for read. For concatenating data to
99
100
  array-like objects.
@@ -106,25 +107,25 @@ def read(
106
107
 
107
108
  Returns
108
109
  -------
109
- (object, n_rows_read)
110
- `object` is the read-out object `n_rows_read` is the number of rows
111
- successfully read out. Essential for arrays when the amount of data
112
- is smaller than the object buffer. For scalars and structs
113
- `n_rows_read` will be``1``. For tables it is redundant with
114
- ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
110
+ object
111
+ the read-out object
115
112
  """
116
113
  if isinstance(lh5_file, h5py.File):
117
114
  lh5_obj = lh5_file[name]
118
115
  elif isinstance(lh5_file, str):
119
116
  lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
120
- lh5_obj = lh5_file[name]
117
+ try:
118
+ lh5_obj = lh5_file[name]
119
+ except KeyError as ke:
120
+ err = f"Object {name} not found in file {lh5_file.filename}"
121
+ raise KeyError(err) from ke
121
122
  else:
122
- lh5_files = list(lh5_file)
123
-
124
- n_rows_read = 0
125
- obj_buf_is_new = False
123
+ if obj_buf is not None:
124
+ obj_buf.resize(obj_buf_start)
125
+ else:
126
+ obj_buf_start = 0
126
127
 
127
- for i, h5f in enumerate(lh5_files):
128
+ for i, h5f in enumerate(lh5_file):
128
129
  if (
129
130
  isinstance(idx, (list, tuple))
130
131
  and len(idx) > 0
@@ -146,33 +147,26 @@ def read(
146
147
  idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
147
148
  else:
148
149
  idx_i = None
149
- n_rows_i = n_rows - n_rows_read
150
150
 
151
- obj_ret = read(
151
+ obj_buf_start_i = len(obj_buf) if obj_buf else 0
152
+ n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
153
+
154
+ obj_buf = read(
152
155
  name,
153
156
  h5f,
154
- start_row,
157
+ start_row if i == 0 else 0,
155
158
  n_rows_i,
156
159
  idx_i,
157
160
  use_h5idx,
158
161
  field_mask,
159
162
  obj_buf,
160
- obj_buf_start,
163
+ obj_buf_start_i,
161
164
  decompress,
162
165
  )
163
- if isinstance(obj_ret, tuple):
164
- obj_buf, n_rows_read_i = obj_ret
165
- obj_buf_is_new = True
166
- else:
167
- obj_buf = obj_ret
168
- n_rows_read_i = len(obj_buf)
169
166
 
170
- n_rows_read += n_rows_read_i
171
- if n_rows_read >= n_rows or obj_buf is None:
172
- return obj_buf, n_rows_read
173
- start_row = 0
174
- obj_buf_start += n_rows_read_i
175
- return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
167
+ if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
168
+ return obj_buf
169
+ return obj_buf
176
170
 
177
171
  if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
178
172
  idx = idx[0]
@@ -192,8 +186,10 @@ def read(
192
186
  obj_buf_start=obj_buf_start,
193
187
  decompress=decompress,
194
188
  )
189
+ with suppress(AttributeError):
190
+ obj.resize(obj_buf_start + n_rows_read)
195
191
 
196
- return obj if obj_buf is None else (obj, n_rows_read)
192
+ return obj
197
193
 
198
194
 
199
195
  def write(
@@ -295,7 +291,12 @@ def write(
295
291
  datasets. **Note: `compression` Ignored if compression is specified
296
292
  as an `obj` attribute.**
297
293
  """
298
- if wo_mode in ("w", "write", "of", "overwrite_file"):
294
+
295
+ if (
296
+ isinstance(lh5_file, str)
297
+ and not Path(lh5_file).is_file()
298
+ and wo_mode in ("w", "write_safe", "of", "overwrite_file")
299
+ ):
299
300
  h5py_kwargs.update(
300
301
  {
301
302
  "fs_strategy": "page",
lgdo/lh5/iterator.py CHANGED
@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
24
24
 
25
25
  This can be used as an iterator:
26
26
 
27
- >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
27
+
28
+ >>> for lh5_obj in LH5Iterator(...):
28
29
  >>> # do the thing!
29
30
 
30
31
  This is intended for if you are reading a large quantity of data. This
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
42
43
  In addition to accessing requested data via ``lh5_obj``, several
43
44
  properties exist to tell you where that data came from:
44
45
 
46
+ - lh5_it.current_i_entry: get the index within the entry list of the
47
+ first entry that is currently read
45
48
  - lh5_it.current_local_entries: get the entry numbers relative to the
46
49
  file the data came from
47
50
  - lh5_it.current_global_entries: get the entry number relative to the
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
49
52
  - lh5_it.current_files: get the file name corresponding to each entry
50
53
  - lh5_it.current_groups: get the group name corresponding to each entry
51
54
 
52
- This class can also be used either for random access:
55
+ This class can also be used for random access:
53
56
 
54
- >>> lh5_obj, n_rows = lh5_it.read(i_entry)
57
+ >>> lh5_obj = lh5_it.read(i_entry)
55
58
 
56
59
  to read the block of entries starting at i_entry. In case of multiple files
57
60
  or the use of an event selection, i_entry refers to a global event index
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
65
68
  base_path: str = "",
66
69
  entry_list: list[int] | list[list[int]] | None = None,
67
70
  entry_mask: list[bool] | list[list[bool]] | None = None,
71
+ i_start: int = 0,
72
+ n_entries: int | None = None,
68
73
  field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
69
74
  buffer_len: int = "100*MB",
70
75
  file_cache: int = 10,
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
89
94
  entry_mask
90
95
  mask of entries to read. If a list of arrays is provided, expect
91
96
  one for each file. Ignore if a selection list is provided.
97
+ i_start
98
+ index of first entry to start at when iterating
99
+ n_entries
100
+ number of entries to read before terminating iteration
92
101
  field_mask
93
102
  mask of which fields to read. See :meth:`LH5Store.read` for
94
103
  more details.
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
183
192
  msg = f"can't open any files from {lh5_files}"
184
193
  raise RuntimeError(msg)
185
194
 
186
- self.n_rows = 0
195
+ self.i_start = i_start
196
+ self.n_entries = n_entries
187
197
  self.current_i_entry = 0
188
198
  self.next_i_entry = 0
189
199
 
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
317
327
  )
318
328
  return self.global_entry_list
319
329
 
320
- def read(self, i_entry: int) -> tuple[LGDO, int]:
321
- """Read the nextlocal chunk of events, starting at i_entry. Return the
322
- LH5 buffer and number of rows read."""
323
- self.n_rows = 0
324
- i_file = np.searchsorted(self.entry_map, i_entry, "right")
330
+ def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
331
+ "Read the nextlocal chunk of events, starting at entry."
332
+ self.lh5_buffer.resize(0)
333
+
334
+ if n_entries is None:
335
+ n_entries = self.buffer_len
336
+ elif n_entries == 0:
337
+ return self.lh5_buffer
338
+ elif n_entries > self.buffer_len:
339
+ msg = "n_entries cannot be larger than buffer_len"
340
+ raise ValueError(msg)
325
341
 
326
342
  # if file hasn't been opened yet, search through files
327
343
  # sequentially until we find the right one
344
+ i_file = np.searchsorted(self.entry_map, i_entry, "right")
328
345
  if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
329
346
  while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
330
347
  i_file
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
332
349
  i_file += 1
333
350
 
334
351
  if i_file == len(self.lh5_files):
335
- return (self.lh5_buffer, self.n_rows)
352
+ return self.lh5_buffer
336
353
  local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
337
354
 
338
- while self.n_rows < self.buffer_len and i_file < len(self.file_map):
355
+ while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
339
356
  # Loop through files
340
357
  local_idx = self.get_file_entrylist(i_file)
341
358
  if local_idx is not None and len(local_idx) == 0:
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
344
361
  continue
345
362
 
346
363
  i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
347
- self.lh5_buffer, n_rows = self.lh5_st.read(
364
+ self.lh5_buffer = self.lh5_st.read(
348
365
  self.groups[i_file],
349
366
  self.lh5_files[i_file],
350
367
  start_row=i_local,
351
- n_rows=self.buffer_len - self.n_rows,
368
+ n_rows=n_entries - len(self.lh5_buffer),
352
369
  idx=local_idx,
353
370
  field_mask=self.field_mask,
354
371
  obj_buf=self.lh5_buffer,
355
- obj_buf_start=self.n_rows,
372
+ obj_buf_start=len(self.lh5_buffer),
356
373
  )
357
374
 
358
- self.n_rows += n_rows
359
375
  i_file += 1
360
376
  local_i_entry = 0
361
377
 
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
364
380
  if self.friend is not None:
365
381
  self.friend.read(i_entry)
366
382
 
367
- return (self.lh5_buffer, self.n_rows)
383
+ return self.lh5_buffer
368
384
 
369
385
  def reset_field_mask(self, mask):
370
386
  """Replaces the field mask of this iterator and any friends with mask"""
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
375
391
  @property
376
392
  def current_local_entries(self) -> NDArray[int]:
377
393
  """Return list of local file entries in buffer"""
378
- cur_entries = np.zeros(self.n_rows, dtype="int32")
394
+ cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
379
395
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
380
396
  file_start = self._get_file_cumentries(i_file - 1)
381
397
  i_local = self.current_i_entry - file_start
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
402
418
  @property
403
419
  def current_global_entries(self) -> NDArray[int]:
404
420
  """Return list of local file entries in buffer"""
405
- cur_entries = np.zeros(self.n_rows, dtype="int32")
421
+ cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
406
422
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
407
423
  file_start = self._get_file_cumentries(i_file - 1)
408
424
  i_local = self.current_i_entry - file_start
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
433
449
  @property
434
450
  def current_files(self) -> NDArray[str]:
435
451
  """Return list of file names for entries in buffer"""
436
- cur_files = np.zeros(self.n_rows, dtype=object)
452
+ cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
437
453
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
438
454
  file_start = self._get_file_cumentries(i_file - 1)
439
455
  i_local = self.current_i_entry - file_start
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
455
471
  @property
456
472
  def current_groups(self) -> NDArray[str]:
457
473
  """Return list of group names for entries in buffer"""
458
- cur_groups = np.zeros(self.n_rows, dtype=object)
474
+ cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
459
475
  i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
460
476
  file_start = self._get_file_cumentries(i_file - 1)
461
477
  i_local = self.current_i_entry - file_start
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
485
501
  def __iter__(self) -> typing.Iterator:
486
502
  """Loop through entries in blocks of size buffer_len."""
487
503
  self.current_i_entry = 0
488
- self.next_i_entry = 0
504
+ self.next_i_entry = self.i_start
489
505
  return self
490
506
 
491
507
  def __next__(self) -> tuple[LGDO, int, int]:
492
- """Read next buffer_len entries and return lh5_table, iterator entry
493
- and n_rows read."""
494
- buf, n_rows = self.read(self.next_i_entry)
495
- self.next_i_entry = self.current_i_entry + n_rows
496
- if n_rows == 0:
508
+ """Read next buffer_len entries and return lh5_table and iterator entry."""
509
+ n_entries = self.n_entries
510
+ if n_entries is not None:
511
+ n_entries = min(
512
+ self.buffer_len, n_entries + self.i_start - self.next_i_entry
513
+ )
514
+
515
+ buf = self.read(self.next_i_entry, n_entries)
516
+ if len(buf) == 0:
497
517
  raise StopIteration
498
- return (buf, self.current_i_entry, n_rows)
518
+ self.next_i_entry = self.current_i_entry + len(buf)
519
+ return buf