PyPI - legend-pydataobj - Versions diffs - 1.11.7__py3-none-any.whl → 1.11.8__py3-none-any.whl - Mend

legend-pydataobj 1.11.7py3-none-any.whl → 1.11.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/METADATA +1 -1
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/RECORD +25 -26
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/WHEEL +1 -1
lgdo/__init__.py +5 -4
lgdo/_version.py +2 -2
lgdo/lh5/__init__.py +1 -3
lgdo/lh5/_serializers/read/composite.py +1 -3
lgdo/lh5/_serializers/write/composite.py +14 -8
lgdo/lh5/concat.py +3 -9
lgdo/lh5/core.py +33 -36
lgdo/lh5/iterator.py +48 -27
lgdo/lh5/store.py +22 -75
lgdo/lh5/tools.py +0 -111
lgdo/lh5/utils.py +6 -4
lgdo/types/array.py +84 -15
lgdo/types/encoded.py +25 -20
lgdo/types/histogram.py +1 -1
lgdo/types/lgdo.py +50 -0
lgdo/types/table.py +49 -28
lgdo/types/vectorofvectors.py +132 -94
lgdo/types/vovutils.py +14 -4
lgdo/types/waveformtable.py +19 -21
lgdo/lh5_store.py +0 -284
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/licenses/LICENSE +0 -0
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/top_level.txt +0 -0

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.11.7
+Version: 1.11.8
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,8 @@
-legend_pydataobj-1.11.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
-lgdo/_version.py,sha256=WYo6AtimYOvXEEB_DEJYUqS-yeVHGFoR5t7JM_9dSwo,513
+legend_pydataobj-1.11.8.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
+lgdo/_version.py,sha256=L3yqa8Aseny4agSopGdx4D8Sk3htDn12nBlc9W8wWes,513
 lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
 lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
-lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
 lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
 lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
 lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -13,19 +12,19 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
 lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
 lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
 lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
-lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
-lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
-lgdo/lh5/core.py,sha256=__-A6Abctzfwfo4-xJi68xs2e4vfzONEQTJVrUCOw-I,13922
+lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
+lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
+lgdo/lh5/core.py,sha256=3o6JsX6aNkMa3plX96a4vG7LWmfco33OuUzV_mMFStQ,13626
 lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
 lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
-lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
-lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
-lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
-lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
+lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
+lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
+lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
+lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
 lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
 lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
-lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
+lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
 lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
 lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
 lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
@@ -33,24 +32,24 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
 lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
 lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
-lgdo/lh5/_serializers/write/composite.py,sha256=I6lH0nWFIpAfZyG4-0rLxzg3mfazZ_FEhQVp1FZ0aA4,9254
+lgdo/lh5/_serializers/write/composite.py,sha256=wiq9O3Cb08wrAm14L7Jz69ppL7SnYEDHgW6pJtY8aBI,9425
 lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
 lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
 lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
-lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
+lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
 lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
-lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
+lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
 lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
-lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
-lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
+lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
+lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
 lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
 lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
-lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
-lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
-lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
-lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
-legend_pydataobj-1.11.7.dist-info/METADATA,sha256=Z0-UFMzWILag78U1HkNpbYwKDb_JZkZ8kZLtW4T8gw0,44443
-legend_pydataobj-1.11.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-legend_pydataobj-1.11.7.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
-legend_pydataobj-1.11.7.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
-legend_pydataobj-1.11.7.dist-info/RECORD,,
+lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
+lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
+lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
+lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
+legend_pydataobj-1.11.8.dist-info/METADATA,sha256=qFXYWIsv8umqbHxK7ltDFwbPU_z5STG6E5CUAxZk6EY,44443
+legend_pydataobj-1.11.8.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
+legend_pydataobj-1.11.8.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
+legend_pydataobj-1.11.8.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
+legend_pydataobj-1.11.8.dist-info/RECORD,,

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.11.8.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (78.1.0)
+Generator: setuptools (80.3.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

lgdo/__init__.py CHANGED Viewed

@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
 from __future__ import annotations
 from ._version import version as __version__
-from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
+from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
 from .types import (
     LGDO,
     Array,
@@ -69,7 +69,6 @@ __all__ = [
     "FixedSizeArray",
     "Histogram",
     "LH5Iterator",
-    "LH5Store",
     "Scalar",
     "Struct",
     "Table",
@@ -77,8 +76,10 @@ __all__ = [
     "VectorOfVectors",
     "WaveformTable",
     "__version__",
-    "load_dfs",
-    "load_nda",
     "ls",
+    "read",
+    "read_as",
+    "read_n_rows",
     "show",
+    "write",
 ]

lgdo/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.11.7'
-__version_tuple__ = version_tuple = (1, 11, 7)
+__version__ = version = '1.11.8'
+__version_tuple__ = version_tuple = (1, 11, 8)

lgdo/lh5/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
 from .core import read, read_as, write
 from .iterator import LH5Iterator
 from .store import LH5Store
-from .tools import load_dfs, load_nda, ls, show
+from .tools import ls, show
 from .utils import read_n_rows
 __all__ = [
@@ -19,8 +19,6 @@ __all__ = [
     "LH5Iterator",
     "LH5Store",
     "concat",
-    "load_dfs",
-    "load_nda",
     "ls",
     "read",
     "read_as",

lgdo/lh5/_serializers/read/composite.py CHANGED Viewed

@@ -353,15 +353,13 @@ def _h5_read_table(
             table = Table(col_dict=col_dict, attrs=attrs)
         # set (write) loc to end of tree
-        table.loc = n_rows_read
+        table.resize(do_warn=True)
         return table, n_rows_read
     # We have read all fields into the object buffer. Run
     # checks: All columns should be the same size. So update
     # table's size as necessary, warn if any mismatches are found
     obj_buf.resize(do_warn=True)
-    # set (write) loc to end of tree
-    obj_buf.loc = obj_buf_start + n_rows_read
     # check attributes
     utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)

lgdo/lh5/_serializers/write/composite.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from __future__ import annotations
 import logging
-import os
 from inspect import signature
+from pathlib import Path
 import h5py
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
     # change any object in the file. So we use file:append for
     # write_object:overwrite.
     if not isinstance(lh5_file, h5py.File):
-        mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
+        mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
         lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
     log.debug(
@@ -186,19 +186,20 @@ def _h5_write_struct(
     write_start=0,
     **h5py_kwargs,
 ):
+    # this works for structs and derived (tables)
     assert isinstance(obj, types.Struct)
     # In order to append a column, we need to update the
-    # `table{old_fields}` value in `group.attrs['datatype"]` to include
+    # `struct/table{old_fields}` value in `group.attrs['datatype"]` to include
     # the new fields.  One way to do this is to override
     # `obj.attrs["datatype"]` to include old and new fields. Then we
-    # can write the fields to the table as normal.
+    # can write the fields to the struct/table as normal.
     if wo_mode == "ac":
         old_group = utils.get_h5_group(name, group)
         lgdotype = datatype.datatype(old_group.attrs["datatype"])
         fields = datatype.get_struct_fields(old_group.attrs["datatype"])
-        if not issubclass(lgdotype, types.Struct):
-            msg = f"Trying to append columns to an object of type {lgdotype.__name__}"
+        if lgdotype is not type(obj):
+            msg = f"Trying to append columns to an object of different type {lgdotype.__name__}!={type(obj)}"
             raise LH5EncodeError(msg, lh5_file, group, name)
         # If the mode is `append_column`, make sure we aren't appending
@@ -211,8 +212,12 @@ def _h5_write_struct(
                 "column(s) to a table with the same field(s)"
             )
             raise LH5EncodeError(msg, lh5_file, group, name)
         # It doesn't matter what key we access, as all fields in the old table have the same size
-        if old_group[next(iter(old_group.keys()))].size != obj.size:
+        if (
+            isinstance(obj, types.Table)
+            and old_group[next(iter(old_group.keys()))].size != obj.size
+        ):
             msg = (
                 f"Table sizes don't match. Trying to append column of size {obj.size} "
                 f"to a table of size {old_group[next(iter(old_group.keys()))].size}."
@@ -222,7 +227,8 @@ def _h5_write_struct(
         # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
         fields.extend(list(obj.keys()))
         obj.attrs.pop("datatype")
-        obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
+        obj.attrs["datatype"] = obj.datatype_name() + "{" + ",".join(fields) + "}"
     group = utils.get_h5_group(
         name,

lgdo/lh5/concat.py CHANGED Viewed

@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
                 continue
             # read as little as possible
-            obj, _ = store.read(current, h5f0, n_rows=1)
+            obj = store.read(current, h5f0, n_rows=1)
             if isinstance(obj, (Table, Array, VectorOfVectors)):
                 lgdos.append(current)
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
         _inplace_table_filter(key, val, obj_list)
-def _slice(obj, n_rows):
-    ak_obj = obj.view_as("ak")[:n_rows]
-    obj_type = type(obj)
-    return obj_type(ak_obj)
 def lh5concat(
     lh5_files: list,
     output: str,
@@ -186,8 +180,8 @@ def lh5concat(
     # loop over lgdo objects
     for lgdo in lgdos:
         # iterate over the files
-        for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
-            data = {lgdo: _slice(lh5_obj, n_rows)}
+        for lh5_obj in LH5Iterator(lh5_files, lgdo):
+            data = {lgdo: lh5_obj}
             # remove the nested fields
             _remove_nested_fields(data, obj_list)

lgdo/lh5/core.py CHANGED Viewed

@@ -4,6 +4,7 @@ import bisect
 import inspect
 import sys
 from collections.abc import Mapping, Sequence
+from contextlib import suppress
 from typing import Any
 import h5py
@@ -92,8 +93,7 @@ def read(
         will be set to ``True``, while the rest will default to ``False``.
     obj_buf
         Read directly into memory provided in `obj_buf`. Note: the buffer
-        will be expanded to accommodate the data requested. To maintain the
-        buffer length, send in ``n_rows = len(obj_buf)``.
+        will be resized to accommodate the data retrieved.
     obj_buf_start
         Start location in ``obj_buf`` for read. For concatenating data to
         array-like objects.
@@ -106,25 +106,25 @@ def read(
     Returns
     -------
-    (object, n_rows_read)
-        `object` is the read-out object `n_rows_read` is the number of rows
-        successfully read out. Essential for arrays when the amount of data
-        is smaller than the object buffer.  For scalars and structs
-        `n_rows_read` will be``1``. For tables it is redundant with
-        ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
+    object
+        the read-out object
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
     elif isinstance(lh5_file, str):
         lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
-        lh5_obj = lh5_file[name]
+        try:
+            lh5_obj = lh5_file[name]
+        except KeyError as ke:
+            err = f"Object {name} not found in file {lh5_file.filename}"
+            raise KeyError(err) from ke
     else:
-        lh5_files = list(lh5_file)
-        n_rows_read = 0
-        obj_buf_is_new = False
+        if obj_buf is not None:
+            obj_buf.resize(obj_buf_start)
+        else:
+            obj_buf_start = 0
-        for i, h5f in enumerate(lh5_files):
+        for i, h5f in enumerate(lh5_file):
             if (
                 isinstance(idx, (list, tuple))
                 and len(idx) > 0
@@ -146,33 +146,26 @@ def read(
                 idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
             else:
                 idx_i = None
-            n_rows_i = n_rows - n_rows_read
-            obj_ret = read(
+            obj_buf_start_i = len(obj_buf) if obj_buf else 0
+            n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
+            obj_buf = read(
                 name,
                 h5f,
-                start_row,
+                start_row if i == 0 else 0,
                 n_rows_i,
                 idx_i,
                 use_h5idx,
                 field_mask,
                 obj_buf,
-                obj_buf_start,
+                obj_buf_start_i,
                 decompress,
             )
-            if isinstance(obj_ret, tuple):
-                obj_buf, n_rows_read_i = obj_ret
-                obj_buf_is_new = True
-            else:
-                obj_buf = obj_ret
-                n_rows_read_i = len(obj_buf)
-            n_rows_read += n_rows_read_i
-            if n_rows_read >= n_rows or obj_buf is None:
-                return obj_buf, n_rows_read
-            start_row = 0
-            obj_buf_start += n_rows_read_i
-        return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
+            if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
+                return obj_buf
+        return obj_buf
     if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
         idx = idx[0]
@@ -192,8 +185,10 @@ def read(
         obj_buf_start=obj_buf_start,
         decompress=decompress,
     )
+    with suppress(AttributeError):
+        obj.resize(obj_buf_start + n_rows_read)
-    return obj if obj_buf is None else (obj, n_rows_read)
+    return obj
 def write(
@@ -273,11 +268,13 @@ def write(
           end of array is the same as ``append``.
         - ``overwrite_file`` or ``of``: delete file if present prior to
           writing to it. `write_start` should be 0 (its ignored).
-        - ``append_column`` or ``ac``: append columns from an
-          :class:`~.lgdo.table.Table` `obj` only if there is an existing
-          :class:`~.lgdo.table.Table` in the `lh5_file` with the same
-          `name` and :class:`~.lgdo.table.Table.size`. If the sizes don't
-          match, or if there are matching fields, it errors out.
+        - ``append_column`` or ``ac``: append fields/columns from an
+          :class:`~.lgdo.struct.Struct` `obj` (and derived types such as
+          :class:`~.lgdo.table.Table`) only if there is an existing
+          :class:`~.lgdo.struct.Struct` in the `lh5_file` with the same `name`.
+          If there are matching fields, it errors out. If appending to a
+          ``Table`` and the size of the new column is different from the size
+          of the existing table, it errors out.
     write_start
         row in the output file (if already existing) to start overwriting
         from.

lgdo/lh5/iterator.py CHANGED Viewed

@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
     This can be used as an iterator:
-    >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
+    >>> for lh5_obj in LH5Iterator(...):
     >>>    # do the thing!
     This is intended for if you are reading a large quantity of data. This
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
     In addition to accessing requested data via ``lh5_obj``, several
     properties exist to tell you where that data came from:
+    - lh5_it.current_i_entry: get the index within the entry list of the
+      first entry that is currently read
     - lh5_it.current_local_entries: get the entry numbers relative to the
       file the data came from
     - lh5_it.current_global_entries: get the entry number relative to the
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
     - lh5_it.current_files: get the file name corresponding to each entry
     - lh5_it.current_groups: get the group name corresponding to each entry
-    This class can also be used either for random access:
+    This class can also be used for random access:
-    >>> lh5_obj, n_rows = lh5_it.read(i_entry)
+    >>> lh5_obj = lh5_it.read(i_entry)
     to read the block of entries starting at i_entry. In case of multiple files
     or the use of an event selection, i_entry refers to a global event index
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
         base_path: str = "",
         entry_list: list[int] | list[list[int]] | None = None,
         entry_mask: list[bool] | list[list[bool]] | None = None,
+        i_start: int = 0,
+        n_entries: int | None = None,
         field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
         buffer_len: int = "100*MB",
         file_cache: int = 10,
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
         entry_mask
             mask of entries to read. If a list of arrays is provided, expect
             one for each file. Ignore if a selection list is provided.
+        i_start
+            index of first entry to start at when iterating
+        n_entries
+            number of entries to read before terminating iteration
         field_mask
             mask of which fields to read. See :meth:`LH5Store.read` for
             more details.
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
             msg = f"can't open any files from {lh5_files}"
             raise RuntimeError(msg)
-        self.n_rows = 0
+        self.i_start = i_start
+        self.n_entries = n_entries
         self.current_i_entry = 0
         self.next_i_entry = 0
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
                 )
         return self.global_entry_list
-    def read(self, i_entry: int) -> tuple[LGDO, int]:
-        """Read the nextlocal chunk of events, starting at i_entry. Return the
-        LH5 buffer and number of rows read."""
-        self.n_rows = 0
-        i_file = np.searchsorted(self.entry_map, i_entry, "right")
+    def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
+        "Read the nextlocal chunk of events, starting at entry."
+        self.lh5_buffer.resize(0)
+        if n_entries is None:
+            n_entries = self.buffer_len
+        elif n_entries == 0:
+            return self.lh5_buffer
+        elif n_entries > self.buffer_len:
+            msg = "n_entries cannot be larger than buffer_len"
+            raise ValueError(msg)
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
+        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
             while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
                 i_file
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
                 i_file += 1
         if i_file == len(self.lh5_files):
-            return (self.lh5_buffer, self.n_rows)
+            return self.lh5_buffer
         local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
-        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
+        while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
             # Loop through files
             local_idx = self.get_file_entrylist(i_file)
             if local_idx is not None and len(local_idx) == 0:
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
                 continue
             i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
-            self.lh5_buffer, n_rows = self.lh5_st.read(
+            self.lh5_buffer = self.lh5_st.read(
                 self.groups[i_file],
                 self.lh5_files[i_file],
                 start_row=i_local,
-                n_rows=self.buffer_len - self.n_rows,
+                n_rows=n_entries - len(self.lh5_buffer),
                 idx=local_idx,
                 field_mask=self.field_mask,
                 obj_buf=self.lh5_buffer,
-                obj_buf_start=self.n_rows,
+                obj_buf_start=len(self.lh5_buffer),
             )
-            self.n_rows += n_rows
             i_file += 1
             local_i_entry = 0
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
         if self.friend is not None:
             self.friend.read(i_entry)
-        return (self.lh5_buffer, self.n_rows)
+        return self.lh5_buffer
     def reset_field_mask(self, mask):
         """Replaces the field mask of this iterator and any friends with mask"""
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_local_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(self.n_rows, dtype="int32")
+        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_global_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(self.n_rows, dtype="int32")
+        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_files(self) -> NDArray[str]:
         """Return list of file names for entries in buffer"""
-        cur_files = np.zeros(self.n_rows, dtype=object)
+        cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_groups(self) -> NDArray[str]:
         """Return list of group names for entries in buffer"""
-        cur_groups = np.zeros(self.n_rows, dtype=object)
+        cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
     def __iter__(self) -> typing.Iterator:
         """Loop through entries in blocks of size buffer_len."""
         self.current_i_entry = 0
-        self.next_i_entry = 0
+        self.next_i_entry = self.i_start
         return self
     def __next__(self) -> tuple[LGDO, int, int]:
-        """Read next buffer_len entries and return lh5_table, iterator entry
-        and n_rows read."""
-        buf, n_rows = self.read(self.next_i_entry)
-        self.next_i_entry = self.current_i_entry + n_rows
-        if n_rows == 0:
+        """Read next buffer_len entries and return lh5_table and iterator entry."""
+        n_entries = self.n_entries
+        if n_entries is not None:
+            n_entries = min(
+                self.buffer_len, n_entries + self.i_start - self.next_i_entry
+            )
+        buf = self.read(self.next_i_entry, n_entries)
+        if len(buf) == 0:
             raise StopIteration
-        return (buf, self.current_i_entry, n_rows)
+        self.next_i_entry = self.current_i_entry + len(buf)
+        return buf

legend-pydataobj 1.11.7__py3-none-any.whl → 1.11.8__py3-none-any.whl

legend-pydataobj 1.11.7py3-none-any.whl → 1.11.8py3-none-any.whl