PyPI - legend-pydataobj - Versions diffs - 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl - Mend

legend-pydataobj 1.11.8py3-none-any.whl → 1.11.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/METADATA +1 -1
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/RECORD +26 -25
lgdo/__init__.py +4 -5
lgdo/_version.py +2 -2
lgdo/lh5/__init__.py +3 -1
lgdo/lh5/_serializers/read/composite.py +3 -1
lgdo/lh5/_serializers/write/composite.py +28 -11
lgdo/lh5/concat.py +9 -3
lgdo/lh5/core.py +31 -26
lgdo/lh5/iterator.py +27 -48
lgdo/lh5/store.py +75 -22
lgdo/lh5/tools.py +111 -0
lgdo/lh5/utils.py +4 -6
lgdo/lh5_store.py +284 -0
lgdo/types/array.py +15 -84
lgdo/types/encoded.py +20 -25
lgdo/types/histogram.py +1 -1
lgdo/types/lgdo.py +0 -50
lgdo/types/table.py +28 -49
lgdo/types/vectorofvectors.py +94 -132
lgdo/types/vovutils.py +4 -14
lgdo/types/waveformtable.py +21 -19
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/WHEEL +0 -0
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/licenses/LICENSE +0 -0
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/top_level.txt +0 -0

{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.11.8
+Version: 1.11.10
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,9 @@
-legend_pydataobj-1.11.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
-lgdo/_version.py,sha256=L3yqa8Aseny4agSopGdx4D8Sk3htDn12nBlc9W8wWes,513
+legend_pydataobj-1.11.10.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
+lgdo/_version.py,sha256=8C1gggsLVh7AtWQynH0LD96q9_ChbquuufT1aPfdUFU,515
 lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
 lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
+lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
 lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
 lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
 lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -12,19 +13,19 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
 lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
 lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
 lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
-lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
-lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
-lgdo/lh5/core.py,sha256=3o6JsX6aNkMa3plX96a4vG7LWmfco33OuUzV_mMFStQ,13626
+lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
+lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
+lgdo/lh5/core.py,sha256=HT50rolOtTijgaGFskRgzoRbC0w-kxrRS2v9O5Q9Ugo,14067
 lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
 lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
-lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
-lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
-lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
-lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
+lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
+lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
+lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
+lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
 lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
 lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
-lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
+lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
 lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
 lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
 lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
@@ -32,24 +33,24 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
 lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
 lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
-lgdo/lh5/_serializers/write/composite.py,sha256=wiq9O3Cb08wrAm14L7Jz69ppL7SnYEDHgW6pJtY8aBI,9425
+lgdo/lh5/_serializers/write/composite.py,sha256=eEfisBAxpF1Q8v4AbORbBQyxg0p5ugMo9cBjicOC5KI,9979
 lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
 lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
 lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
-lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
+lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
 lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
-lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
+lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
 lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
-lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
-lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
+lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
+lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
 lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
 lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
-lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
-lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
-lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
-lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
-legend_pydataobj-1.11.8.dist-info/METADATA,sha256=qFXYWIsv8umqbHxK7ltDFwbPU_z5STG6E5CUAxZk6EY,44443
-legend_pydataobj-1.11.8.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-legend_pydataobj-1.11.8.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
-legend_pydataobj-1.11.8.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
-legend_pydataobj-1.11.8.dist-info/RECORD,,
+lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
+lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
+lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
+lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
+legend_pydataobj-1.11.10.dist-info/METADATA,sha256=oWYdBOz-guRsPbd6918FxEhKM2C_8sQrhSqpp4AlL30,44444
+legend_pydataobj-1.11.10.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+legend_pydataobj-1.11.10.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
+legend_pydataobj-1.11.10.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
+legend_pydataobj-1.11.10.dist-info/RECORD,,

lgdo/__init__.py CHANGED Viewed

@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
 from __future__ import annotations
 from ._version import version as __version__
-from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
+from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
 from .types import (
     LGDO,
     Array,
@@ -69,6 +69,7 @@ __all__ = [
     "FixedSizeArray",
     "Histogram",
     "LH5Iterator",
+    "LH5Store",
     "Scalar",
     "Struct",
     "Table",
@@ -76,10 +77,8 @@ __all__ = [
     "VectorOfVectors",
     "WaveformTable",
     "__version__",
+    "load_dfs",
+    "load_nda",
     "ls",
-    "read",
-    "read_as",
-    "read_n_rows",
     "show",
-    "write",
 ]

lgdo/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.11.8'
-__version_tuple__ = version_tuple = (1, 11, 8)
+__version__ = version = '1.11.10'
+__version_tuple__ = version_tuple = (1, 11, 10)

lgdo/lh5/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
 from .core import read, read_as, write
 from .iterator import LH5Iterator
 from .store import LH5Store
-from .tools import ls, show
+from .tools import load_dfs, load_nda, ls, show
 from .utils import read_n_rows
 __all__ = [
@@ -19,6 +19,8 @@ __all__ = [
     "LH5Iterator",
     "LH5Store",
     "concat",
+    "load_dfs",
+    "load_nda",
     "ls",
     "read",
     "read_as",

lgdo/lh5/_serializers/read/composite.py CHANGED Viewed

@@ -353,13 +353,15 @@ def _h5_read_table(
             table = Table(col_dict=col_dict, attrs=attrs)
         # set (write) loc to end of tree
-        table.resize(do_warn=True)
+        table.loc = n_rows_read
         return table, n_rows_read
     # We have read all fields into the object buffer. Run
     # checks: All columns should be the same size. So update
     # table's size as necessary, warn if any mismatches are found
     obj_buf.resize(do_warn=True)
+    # set (write) loc to end of tree
+    obj_buf.loc = obj_buf_start + n_rows_read
     # check attributes
     utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)

lgdo/lh5/_serializers/write/composite.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from __future__ import annotations
 import logging
+import os
 from inspect import signature
-from pathlib import Path
 import h5py
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
     # change any object in the file. So we use file:append for
     # write_object:overwrite.
     if not isinstance(lh5_file, h5py.File):
-        mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
+        mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
         lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
     log.debug(
@@ -191,15 +191,26 @@ def _h5_write_struct(
     # In order to append a column, we need to update the
     # `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
-    # the new fields.  One way to do this is to override
-    # `obj.attrs["datatype"]` to include old and new fields. Then we
-    # can write the fields to the struct/table as normal.
+    # the new fields. One way to do this is to override `obj.attrs["datatype"]`
+    # to include old and new fields. Then we can write the fields to the
+    # struct/table as normal.
     if wo_mode == "ac":
+        if name not in group:
+            msg = "Cannot append column to non-existing struct on disk"
+            raise LH5EncodeError(msg, lh5_file, group, name)
         old_group = utils.get_h5_group(name, group)
+        if "datatype" not in old_group.attrs:
+            msg = "Cannot append column to an existing  non-LGDO object on disk"
+            raise LH5EncodeError(msg, lh5_file, group, name)
         lgdotype = datatype.datatype(old_group.attrs["datatype"])
         fields = datatype.get_struct_fields(old_group.attrs["datatype"])
         if lgdotype is not type(obj):
-            msg = f"Trying to append columns to an object of different type {lgdotype.__name__}!={type(obj)}"
+            msg = (
+                "Trying to append columns to an object of different "
+                f"type {lgdotype.__name__}!={type(obj)}"
+            )
             raise LH5EncodeError(msg, lh5_file, group, name)
         # If the mode is `append_column`, make sure we aren't appending
@@ -230,14 +241,22 @@ def _h5_write_struct(
         obj.attrs["datatype"] = obj.datatype_name() + "{" + ",".join(fields) + "}"
+        # propagating wo_mode="ac" to nested LGDOs does not make any sense
+        wo_mode = "append"
+        # overwrite attributes of the existing struct
+        attrs_overwrite = True
+    else:
+        attrs_overwrite = wo_mode == "o"
     group = utils.get_h5_group(
         name,
         group,
         grp_attrs=obj.attrs,
-        overwrite=(wo_mode in ["o", "ac"]),
+        overwrite=attrs_overwrite,
     )
     # If the mode is overwrite, then we need to peek into the file's
-    # table's existing fields.  If we are writing a new table to the
+    # table's existing fields. If we are writing a new table to the
     # group that does not contain an old field, we should delete that
     # old field from the file
     if wo_mode == "o":
@@ -266,11 +285,9 @@ def _h5_write_struct(
         else:
             obj_fld = obj[field]
-        # Convert keys to string for dataset names
-        f = str(field)
         _h5_write_lgdo(
             obj_fld,
-            f,
+            str(field),
             lh5_file,
             group=group,
             start_row=start_row,

lgdo/lh5/concat.py CHANGED Viewed

@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
                 continue
             # read as little as possible
-            obj = store.read(current, h5f0, n_rows=1)
+            obj, _ = store.read(current, h5f0, n_rows=1)
             if isinstance(obj, (Table, Array, VectorOfVectors)):
                 lgdos.append(current)
@@ -139,6 +139,12 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
         _inplace_table_filter(key, val, obj_list)
+def _slice(obj, n_rows):
+    ak_obj = obj.view_as("ak")[:n_rows]
+    obj_type = type(obj)
+    return obj_type(ak_obj)
 def lh5concat(
     lh5_files: list,
     output: str,
@@ -180,8 +186,8 @@ def lh5concat(
     # loop over lgdo objects
     for lgdo in lgdos:
         # iterate over the files
-        for lh5_obj in LH5Iterator(lh5_files, lgdo):
-            data = {lgdo: lh5_obj}
+        for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
+            data = {lgdo: _slice(lh5_obj, n_rows)}
             # remove the nested fields
             _remove_nested_fields(data, obj_list)

lgdo/lh5/core.py CHANGED Viewed

@@ -4,7 +4,6 @@ import bisect
 import inspect
 import sys
 from collections.abc import Mapping, Sequence
-from contextlib import suppress
 from typing import Any
 import h5py
@@ -93,7 +92,8 @@ def read(
         will be set to ``True``, while the rest will default to ``False``.
     obj_buf
         Read directly into memory provided in `obj_buf`. Note: the buffer
-        will be resized to accommodate the data retrieved.
+        will be expanded to accommodate the data requested. To maintain the
+        buffer length, send in ``n_rows = len(obj_buf)``.
     obj_buf_start
         Start location in ``obj_buf`` for read. For concatenating data to
         array-like objects.
@@ -106,25 +106,25 @@ def read(
     Returns
     -------
-    object
-        the read-out object
+    (object, n_rows_read)
+        `object` is the read-out object `n_rows_read` is the number of rows
+        successfully read out. Essential for arrays when the amount of data
+        is smaller than the object buffer.  For scalars and structs
+        `n_rows_read` will be``1``. For tables it is redundant with
+        ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
     elif isinstance(lh5_file, str):
         lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
-        try:
-            lh5_obj = lh5_file[name]
-        except KeyError as ke:
-            err = f"Object {name} not found in file {lh5_file.filename}"
-            raise KeyError(err) from ke
+        lh5_obj = lh5_file[name]
     else:
-        if obj_buf is not None:
-            obj_buf.resize(obj_buf_start)
-        else:
-            obj_buf_start = 0
+        lh5_files = list(lh5_file)
+        n_rows_read = 0
+        obj_buf_is_new = False
-        for i, h5f in enumerate(lh5_file):
+        for i, h5f in enumerate(lh5_files):
             if (
                 isinstance(idx, (list, tuple))
                 and len(idx) > 0
@@ -146,26 +146,33 @@ def read(
                 idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
             else:
                 idx_i = None
+            n_rows_i = n_rows - n_rows_read
-            obj_buf_start_i = len(obj_buf) if obj_buf else 0
-            n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
-            obj_buf = read(
+            obj_ret = read(
                 name,
                 h5f,
-                start_row if i == 0 else 0,
+                start_row,
                 n_rows_i,
                 idx_i,
                 use_h5idx,
                 field_mask,
                 obj_buf,
-                obj_buf_start_i,
+                obj_buf_start,
                 decompress,
             )
+            if isinstance(obj_ret, tuple):
+                obj_buf, n_rows_read_i = obj_ret
+                obj_buf_is_new = True
+            else:
+                obj_buf = obj_ret
+                n_rows_read_i = len(obj_buf)
-            if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
-                return obj_buf
-        return obj_buf
+            n_rows_read += n_rows_read_i
+            if n_rows_read >= n_rows or obj_buf is None:
+                return obj_buf, n_rows_read
+            start_row = 0
+            obj_buf_start += n_rows_read_i
+        return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
     if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
         idx = idx[0]
@@ -185,10 +192,8 @@ def read(
         obj_buf_start=obj_buf_start,
         decompress=decompress,
     )
-    with suppress(AttributeError):
-        obj.resize(obj_buf_start + n_rows_read)
-    return obj
+    return obj if obj_buf is None else (obj, n_rows_read)
 def write(

lgdo/lh5/iterator.py CHANGED Viewed

@@ -24,8 +24,7 @@ class LH5Iterator(typing.Iterator):
     This can be used as an iterator:
-    >>> for lh5_obj in LH5Iterator(...):
+    >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
     >>>    # do the thing!
     This is intended for if you are reading a large quantity of data. This
@@ -43,8 +42,6 @@ class LH5Iterator(typing.Iterator):
     In addition to accessing requested data via ``lh5_obj``, several
     properties exist to tell you where that data came from:
-    - lh5_it.current_i_entry: get the index within the entry list of the
-      first entry that is currently read
     - lh5_it.current_local_entries: get the entry numbers relative to the
       file the data came from
     - lh5_it.current_global_entries: get the entry number relative to the
@@ -52,9 +49,9 @@ class LH5Iterator(typing.Iterator):
     - lh5_it.current_files: get the file name corresponding to each entry
     - lh5_it.current_groups: get the group name corresponding to each entry
-    This class can also be used for random access:
+    This class can also be used either for random access:
-    >>> lh5_obj = lh5_it.read(i_entry)
+    >>> lh5_obj, n_rows = lh5_it.read(i_entry)
     to read the block of entries starting at i_entry. In case of multiple files
     or the use of an event selection, i_entry refers to a global event index
@@ -68,8 +65,6 @@ class LH5Iterator(typing.Iterator):
         base_path: str = "",
         entry_list: list[int] | list[list[int]] | None = None,
         entry_mask: list[bool] | list[list[bool]] | None = None,
-        i_start: int = 0,
-        n_entries: int | None = None,
         field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
         buffer_len: int = "100*MB",
         file_cache: int = 10,
@@ -94,10 +89,6 @@ class LH5Iterator(typing.Iterator):
         entry_mask
             mask of entries to read. If a list of arrays is provided, expect
             one for each file. Ignore if a selection list is provided.
-        i_start
-            index of first entry to start at when iterating
-        n_entries
-            number of entries to read before terminating iteration
         field_mask
             mask of which fields to read. See :meth:`LH5Store.read` for
             more details.
@@ -192,8 +183,7 @@ class LH5Iterator(typing.Iterator):
             msg = f"can't open any files from {lh5_files}"
             raise RuntimeError(msg)
-        self.i_start = i_start
-        self.n_entries = n_entries
+        self.n_rows = 0
         self.current_i_entry = 0
         self.next_i_entry = 0
@@ -327,21 +317,14 @@ class LH5Iterator(typing.Iterator):
                 )
         return self.global_entry_list
-    def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
-        "Read the nextlocal chunk of events, starting at entry."
-        self.lh5_buffer.resize(0)
-        if n_entries is None:
-            n_entries = self.buffer_len
-        elif n_entries == 0:
-            return self.lh5_buffer
-        elif n_entries > self.buffer_len:
-            msg = "n_entries cannot be larger than buffer_len"
-            raise ValueError(msg)
+    def read(self, i_entry: int) -> tuple[LGDO, int]:
+        """Read the nextlocal chunk of events, starting at i_entry. Return the
+        LH5 buffer and number of rows read."""
+        self.n_rows = 0
+        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
-        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
             while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
                 i_file
@@ -349,10 +332,10 @@ class LH5Iterator(typing.Iterator):
                 i_file += 1
         if i_file == len(self.lh5_files):
-            return self.lh5_buffer
+            return (self.lh5_buffer, self.n_rows)
         local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
-        while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
+        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
             # Loop through files
             local_idx = self.get_file_entrylist(i_file)
             if local_idx is not None and len(local_idx) == 0:
@@ -361,17 +344,18 @@ class LH5Iterator(typing.Iterator):
                 continue
             i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
-            self.lh5_buffer = self.lh5_st.read(
+            self.lh5_buffer, n_rows = self.lh5_st.read(
                 self.groups[i_file],
                 self.lh5_files[i_file],
                 start_row=i_local,
-                n_rows=n_entries - len(self.lh5_buffer),
+                n_rows=self.buffer_len - self.n_rows,
                 idx=local_idx,
                 field_mask=self.field_mask,
                 obj_buf=self.lh5_buffer,
-                obj_buf_start=len(self.lh5_buffer),
+                obj_buf_start=self.n_rows,
             )
+            self.n_rows += n_rows
             i_file += 1
             local_i_entry = 0
@@ -380,7 +364,7 @@ class LH5Iterator(typing.Iterator):
         if self.friend is not None:
             self.friend.read(i_entry)
-        return self.lh5_buffer
+        return (self.lh5_buffer, self.n_rows)
     def reset_field_mask(self, mask):
         """Replaces the field mask of this iterator and any friends with mask"""
@@ -391,7 +375,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_local_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
+        cur_entries = np.zeros(self.n_rows, dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -418,7 +402,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_global_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
+        cur_entries = np.zeros(self.n_rows, dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -449,7 +433,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_files(self) -> NDArray[str]:
         """Return list of file names for entries in buffer"""
-        cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
+        cur_files = np.zeros(self.n_rows, dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -471,7 +455,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_groups(self) -> NDArray[str]:
         """Return list of group names for entries in buffer"""
-        cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
+        cur_groups = np.zeros(self.n_rows, dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -501,19 +485,14 @@ class LH5Iterator(typing.Iterator):
     def __iter__(self) -> typing.Iterator:
         """Loop through entries in blocks of size buffer_len."""
         self.current_i_entry = 0
-        self.next_i_entry = self.i_start
+        self.next_i_entry = 0
         return self
     def __next__(self) -> tuple[LGDO, int, int]:
-        """Read next buffer_len entries and return lh5_table and iterator entry."""
-        n_entries = self.n_entries
-        if n_entries is not None:
-            n_entries = min(
-                self.buffer_len, n_entries + self.i_start - self.next_i_entry
-            )
-        buf = self.read(self.next_i_entry, n_entries)
-        if len(buf) == 0:
+        """Read next buffer_len entries and return lh5_table, iterator entry
+        and n_rows read."""
+        buf, n_rows = self.read(self.next_i_entry)
+        self.next_i_entry = self.current_i_entry + n_rows
+        if n_rows == 0:
             raise StopIteration
-        self.next_i_entry = self.current_i_entry + len(buf)
-        return buf
+        return (buf, self.current_i_entry, n_rows)

legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl

legend-pydataobj 1.11.8py3-none-any.whl → 1.11.10py3-none-any.whl