PyPI - legend-pydataobj - Versions diffs - 1.11.7__py3-none-any.whl → 1.12.0a2__py3-none-any.whl - Mend

legend-pydataobj 1.11.7py3-none-any.whl → 1.12.0a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/METADATA +1 -1
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/RECORD +23 -24
lgdo/__init__.py +5 -4
lgdo/_version.py +2 -2
lgdo/lh5/__init__.py +1 -3
lgdo/lh5/_serializers/read/composite.py +1 -3
lgdo/lh5/concat.py +3 -9
lgdo/lh5/core.py +21 -30
lgdo/lh5/iterator.py +48 -27
lgdo/lh5/store.py +15 -68
lgdo/lh5/tools.py +0 -111
lgdo/types/array.py +84 -15
lgdo/types/encoded.py +25 -20
lgdo/types/histogram.py +1 -1
lgdo/types/lgdo.py +50 -0
lgdo/types/table.py +49 -28
lgdo/types/vectorofvectors.py +72 -76
lgdo/types/vovutils.py +14 -4
lgdo/types/waveformtable.py +19 -21
lgdo/lh5_store.py +0 -284
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/WHEEL +0 -0
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/licenses/LICENSE +0 -0
{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/top_level.txt +0 -0

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.11.7
+Version: 1.12.0a2
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.11.7.dist-info → legend_pydataobj-1.12.0a2.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,8 @@
-legend_pydataobj-1.11.7.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
-lgdo/_version.py,sha256=WYo6AtimYOvXEEB_DEJYUqS-yeVHGFoR5t7JM_9dSwo,513
+legend_pydataobj-1.12.0a2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
+lgdo/_version.py,sha256=JnfDM_d42edyo74E887XCyb9fjk-f5Vnz-5nNPPYBpo,515
 lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
 lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
-lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
 lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
 lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
 lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -13,19 +12,19 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
 lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
 lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
 lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
-lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
-lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
-lgdo/lh5/core.py,sha256=__-A6Abctzfwfo4-xJi68xs2e4vfzONEQTJVrUCOw-I,13922
+lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
+lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
+lgdo/lh5/core.py,sha256=GjosZGUp4GSO5FtWV9eXUt_6DGU_OwJXODlj5K1j93M,13320
 lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
 lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
-lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
-lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
-lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
+lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
+lgdo/lh5/store.py,sha256=MYbMt-Mc7izELxuyLlSrrYrylCIzxc2CLzZYIVbZ33w,8455
+lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
 lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
 lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
 lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
-lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
+lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
 lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
 lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
 lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
@@ -37,20 +36,20 @@ lgdo/lh5/_serializers/write/composite.py,sha256=I6lH0nWFIpAfZyG4-0rLxzg3mfazZ_FE
 lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
 lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
 lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
-lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
+lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
 lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
-lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
+lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
 lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
-lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
-lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
+lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
+lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
 lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
 lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
-lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
-lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
-lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
-lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
-legend_pydataobj-1.11.7.dist-info/METADATA,sha256=Z0-UFMzWILag78U1HkNpbYwKDb_JZkZ8kZLtW4T8gw0,44443
-legend_pydataobj-1.11.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
-legend_pydataobj-1.11.7.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
-legend_pydataobj-1.11.7.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
-legend_pydataobj-1.11.7.dist-info/RECORD,,
+lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
+lgdo/types/vectorofvectors.py,sha256=-5m3g5w03nqs__Uv4cO36A_7_h_4mJhFpIhzJh3Y5D0,24855
+lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
+lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
+legend_pydataobj-1.12.0a2.dist-info/METADATA,sha256=71-hhjEgQZ9NqNS7FQkFYPALw6VZJf6vDMtCGMrdohE,44445
+legend_pydataobj-1.12.0a2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
+legend_pydataobj-1.12.0a2.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
+legend_pydataobj-1.12.0a2.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
+legend_pydataobj-1.12.0a2.dist-info/RECORD,,

lgdo/__init__.py CHANGED Viewed

@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
 from __future__ import annotations
 from ._version import version as __version__
-from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
+from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
 from .types import (
     LGDO,
     Array,
@@ -69,7 +69,6 @@ __all__ = [
     "FixedSizeArray",
     "Histogram",
     "LH5Iterator",
-    "LH5Store",
     "Scalar",
     "Struct",
     "Table",
@@ -77,8 +76,10 @@ __all__ = [
     "VectorOfVectors",
     "WaveformTable",
     "__version__",
-    "load_dfs",
-    "load_nda",
     "ls",
+    "read",
+    "read_as",
+    "read_n_rows",
     "show",
+    "write",
 ]

lgdo/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.11.7'
-__version_tuple__ = version_tuple = (1, 11, 7)
+__version__ = version = '1.12.0a2'
+__version_tuple__ = version_tuple = (1, 12, 0)

lgdo/lh5/__init__.py CHANGED Viewed

@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
 from .core import read, read_as, write
 from .iterator import LH5Iterator
 from .store import LH5Store
-from .tools import load_dfs, load_nda, ls, show
+from .tools import ls, show
 from .utils import read_n_rows
 __all__ = [
@@ -19,8 +19,6 @@ __all__ = [
     "LH5Iterator",
     "LH5Store",
     "concat",
-    "load_dfs",
-    "load_nda",
     "ls",
     "read",
     "read_as",

lgdo/lh5/_serializers/read/composite.py CHANGED Viewed

@@ -353,15 +353,13 @@ def _h5_read_table(
             table = Table(col_dict=col_dict, attrs=attrs)
         # set (write) loc to end of tree
-        table.loc = n_rows_read
+        table.resize(do_warn=True)
         return table, n_rows_read
     # We have read all fields into the object buffer. Run
     # checks: All columns should be the same size. So update
     # table's size as necessary, warn if any mismatches are found
     obj_buf.resize(do_warn=True)
-    # set (write) loc to end of tree
-    obj_buf.loc = obj_buf_start + n_rows_read
     # check attributes
     utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)

lgdo/lh5/concat.py CHANGED Viewed

@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
                 continue
             # read as little as possible
-            obj, _ = store.read(current, h5f0, n_rows=1)
+            obj = store.read(current, h5f0, n_rows=1)
             if isinstance(obj, (Table, Array, VectorOfVectors)):
                 lgdos.append(current)
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
         _inplace_table_filter(key, val, obj_list)
-def _slice(obj, n_rows):
-    ak_obj = obj.view_as("ak")[:n_rows]
-    obj_type = type(obj)
-    return obj_type(ak_obj)
 def lh5concat(
     lh5_files: list,
     output: str,
@@ -186,8 +180,8 @@ def lh5concat(
     # loop over lgdo objects
     for lgdo in lgdos:
         # iterate over the files
-        for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
-            data = {lgdo: _slice(lh5_obj, n_rows)}
+        for lh5_obj in LH5Iterator(lh5_files, lgdo):
+            data = {lgdo: lh5_obj}
             # remove the nested fields
             _remove_nested_fields(data, obj_list)

lgdo/lh5/core.py CHANGED Viewed

@@ -4,6 +4,7 @@ import bisect
 import inspect
 import sys
 from collections.abc import Mapping, Sequence
+from contextlib import suppress
 from typing import Any
 import h5py
@@ -92,8 +93,7 @@ def read(
         will be set to ``True``, while the rest will default to ``False``.
     obj_buf
         Read directly into memory provided in `obj_buf`. Note: the buffer
-        will be expanded to accommodate the data requested. To maintain the
-        buffer length, send in ``n_rows = len(obj_buf)``.
+        will be resized to accommodate the data retrieved.
     obj_buf_start
         Start location in ``obj_buf`` for read. For concatenating data to
         array-like objects.
@@ -106,12 +106,8 @@ def read(
     Returns
     -------
-    (object, n_rows_read)
-        `object` is the read-out object `n_rows_read` is the number of rows
-        successfully read out. Essential for arrays when the amount of data
-        is smaller than the object buffer.  For scalars and structs
-        `n_rows_read` will be``1``. For tables it is redundant with
-        ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
+    object
+        the read-out object
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
@@ -119,12 +115,12 @@ def read(
         lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
         lh5_obj = lh5_file[name]
     else:
-        lh5_files = list(lh5_file)
-        n_rows_read = 0
-        obj_buf_is_new = False
+        if obj_buf is not None:
+            obj_buf.resize(obj_buf_start)
+        else:
+            obj_buf_start = 0
-        for i, h5f in enumerate(lh5_files):
+        for i, h5f in enumerate(lh5_file):
             if (
                 isinstance(idx, (list, tuple))
                 and len(idx) > 0
@@ -146,33 +142,26 @@ def read(
                 idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
             else:
                 idx_i = None
-            n_rows_i = n_rows - n_rows_read
-            obj_ret = read(
+            obj_buf_start_i = len(obj_buf) if obj_buf else 0
+            n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
+            obj_buf = read(
                 name,
                 h5f,
-                start_row,
+                start_row if i == 0 else 0,
                 n_rows_i,
                 idx_i,
                 use_h5idx,
                 field_mask,
                 obj_buf,
-                obj_buf_start,
+                obj_buf_start_i,
                 decompress,
             )
-            if isinstance(obj_ret, tuple):
-                obj_buf, n_rows_read_i = obj_ret
-                obj_buf_is_new = True
-            else:
-                obj_buf = obj_ret
-                n_rows_read_i = len(obj_buf)
-            n_rows_read += n_rows_read_i
-            if n_rows_read >= n_rows or obj_buf is None:
-                return obj_buf, n_rows_read
-            start_row = 0
-            obj_buf_start += n_rows_read_i
-        return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
+            if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
+                return obj_buf
+        return obj_buf
     if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
         idx = idx[0]
@@ -192,8 +181,10 @@ def read(
         obj_buf_start=obj_buf_start,
         decompress=decompress,
     )
+    with suppress(AttributeError):
+        obj.resize(obj_buf_start + n_rows_read)
-    return obj if obj_buf is None else (obj, n_rows_read)
+    return obj
 def write(

lgdo/lh5/iterator.py CHANGED Viewed

@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
     This can be used as an iterator:
-    >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
+    >>> for lh5_obj in LH5Iterator(...):
     >>>    # do the thing!
     This is intended for if you are reading a large quantity of data. This
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
     In addition to accessing requested data via ``lh5_obj``, several
     properties exist to tell you where that data came from:
+    - lh5_it.current_i_entry: get the index within the entry list of the
+      first entry that is currently read
     - lh5_it.current_local_entries: get the entry numbers relative to the
       file the data came from
     - lh5_it.current_global_entries: get the entry number relative to the
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
     - lh5_it.current_files: get the file name corresponding to each entry
     - lh5_it.current_groups: get the group name corresponding to each entry
-    This class can also be used either for random access:
+    This class can also be used for random access:
-    >>> lh5_obj, n_rows = lh5_it.read(i_entry)
+    >>> lh5_obj = lh5_it.read(i_entry)
     to read the block of entries starting at i_entry. In case of multiple files
     or the use of an event selection, i_entry refers to a global event index
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
         base_path: str = "",
         entry_list: list[int] | list[list[int]] | None = None,
         entry_mask: list[bool] | list[list[bool]] | None = None,
+        i_start: int = 0,
+        n_entries: int | None = None,
         field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
         buffer_len: int = "100*MB",
         file_cache: int = 10,
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
         entry_mask
             mask of entries to read. If a list of arrays is provided, expect
             one for each file. Ignore if a selection list is provided.
+        i_start
+            index of first entry to start at when iterating
+        n_entries
+            number of entries to read before terminating iteration
         field_mask
             mask of which fields to read. See :meth:`LH5Store.read` for
             more details.
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
             msg = f"can't open any files from {lh5_files}"
             raise RuntimeError(msg)
-        self.n_rows = 0
+        self.i_start = i_start
+        self.n_entries = n_entries
         self.current_i_entry = 0
         self.next_i_entry = 0
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
                 )
         return self.global_entry_list
-    def read(self, i_entry: int) -> tuple[LGDO, int]:
-        """Read the nextlocal chunk of events, starting at i_entry. Return the
-        LH5 buffer and number of rows read."""
-        self.n_rows = 0
-        i_file = np.searchsorted(self.entry_map, i_entry, "right")
+    def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
+        "Read the nextlocal chunk of events, starting at entry."
+        self.lh5_buffer.resize(0)
+        if n_entries is None:
+            n_entries = self.buffer_len
+        elif n_entries == 0:
+            return self.lh5_buffer
+        elif n_entries > self.buffer_len:
+            msg = "n_entries cannot be larger than buffer_len"
+            raise ValueError(msg)
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
+        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
             while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
                 i_file
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
                 i_file += 1
         if i_file == len(self.lh5_files):
-            return (self.lh5_buffer, self.n_rows)
+            return self.lh5_buffer
         local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
-        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
+        while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
             # Loop through files
             local_idx = self.get_file_entrylist(i_file)
             if local_idx is not None and len(local_idx) == 0:
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
                 continue
             i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
-            self.lh5_buffer, n_rows = self.lh5_st.read(
+            self.lh5_buffer = self.lh5_st.read(
                 self.groups[i_file],
                 self.lh5_files[i_file],
                 start_row=i_local,
-                n_rows=self.buffer_len - self.n_rows,
+                n_rows=n_entries - len(self.lh5_buffer),
                 idx=local_idx,
                 field_mask=self.field_mask,
                 obj_buf=self.lh5_buffer,
-                obj_buf_start=self.n_rows,
+                obj_buf_start=len(self.lh5_buffer),
             )
-            self.n_rows += n_rows
             i_file += 1
             local_i_entry = 0
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
         if self.friend is not None:
             self.friend.read(i_entry)
-        return (self.lh5_buffer, self.n_rows)
+        return self.lh5_buffer
     def reset_field_mask(self, mask):
         """Replaces the field mask of this iterator and any friends with mask"""
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_local_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(self.n_rows, dtype="int32")
+        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_global_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(self.n_rows, dtype="int32")
+        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_files(self) -> NDArray[str]:
         """Return list of file names for entries in buffer"""
-        cur_files = np.zeros(self.n_rows, dtype=object)
+        cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_groups(self) -> NDArray[str]:
         """Return list of group names for entries in buffer"""
-        cur_groups = np.zeros(self.n_rows, dtype=object)
+        cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
     def __iter__(self) -> typing.Iterator:
         """Loop through entries in blocks of size buffer_len."""
         self.current_i_entry = 0
-        self.next_i_entry = 0
+        self.next_i_entry = self.i_start
         return self
     def __next__(self) -> tuple[LGDO, int, int]:
-        """Read next buffer_len entries and return lh5_table, iterator entry
-        and n_rows read."""
-        buf, n_rows = self.read(self.next_i_entry)
-        self.next_i_entry = self.current_i_entry + n_rows
-        if n_rows == 0:
+        """Read next buffer_len entries and return lh5_table and iterator entry."""
+        n_entries = self.n_entries
+        if n_entries is not None:
+            n_entries = min(
+                self.buffer_len, n_entries + self.i_start - self.next_i_entry
+            )
+        buf = self.read(self.next_i_entry, n_entries)
+        if len(buf) == 0:
             raise StopIteration
-        return (buf, self.current_i_entry, n_rows)
+        self.next_i_entry = self.current_i_entry + len(buf)
+        return buf

lgdo/lh5/store.py CHANGED Viewed

@@ -5,7 +5,6 @@ HDF5 files.
 from __future__ import annotations
-import bisect
 import logging
 import os
 import sys
@@ -15,11 +14,11 @@ from inspect import signature
 from typing import Any
 import h5py
-import numpy as np
 from numpy.typing import ArrayLike
 from .. import types
 from . import _serializers, utils
+from .core import read
 log = logging.getLogger(__name__)
@@ -155,7 +154,7 @@ class LH5Store:
         """Returns an LH5 object appropriate for use as a pre-allocated buffer
         in a read loop. Sets size to `size` if object has a size.
         """
-        obj, n_rows = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
+        obj = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
         if hasattr(obj, "resize") and size is not None:
             obj.resize(new_size=size)
         return obj
@@ -182,72 +181,20 @@ class LH5Store:
         """
         # grab files from store
         if isinstance(lh5_file, (str, h5py.File)):
-            lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
+            h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
         else:
-            lh5_files = list(lh5_file)
-            n_rows_read = 0
-            for i, h5f in enumerate(lh5_files):
-                if (
-                    isinstance(idx, (list, tuple))
-                    and len(idx) > 0
-                    and not np.isscalar(idx[0])
-                ):
-                    # a list of lists: must be one per file
-                    idx_i = idx[i]
-                elif idx is not None:
-                    # make idx a proper tuple if it's not one already
-                    if not (isinstance(idx, tuple) and len(idx) == 1):
-                        idx = (idx,)
-                    # idx is a long continuous array
-                    n_rows_i = utils.read_n_rows(name, h5f)
-                    # find the length of the subset of idx that contains indices
-                    # that are less than n_rows_i
-                    n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
-                    # now split idx into idx_i and the remainder
-                    idx_i = np.array(idx[0])[:n_rows_to_read_i]
-                    idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
-                else:
-                    idx_i = None
-                n_rows_i = n_rows - n_rows_read
-                obj_buf, n_rows_read_i = self.read(
-                    name,
-                    h5f,
-                    start_row,
-                    n_rows_i,
-                    idx_i,
-                    use_h5idx,
-                    field_mask,
-                    obj_buf,
-                    obj_buf_start,
-                    decompress,
-                )
-                n_rows_read += n_rows_read_i
-                if n_rows_read >= n_rows or obj_buf is None:
-                    return obj_buf, n_rows_read
-                start_row = 0
-                obj_buf_start += n_rows_read_i
-            return obj_buf, n_rows_read
-        if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
-            idx = idx[0]
-        if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
-            idx = np.where(idx)[0]
-        return _serializers._h5_read_lgdo(
-            lh5_obj.id,
-            lh5_obj.file.filename,
-            lh5_obj.name,
-            start_row=start_row,
-            n_rows=n_rows,
-            idx=idx,
-            use_h5idx=use_h5idx,
-            field_mask=field_mask,
-            obj_buf=obj_buf,
-            obj_buf_start=obj_buf_start,
-            decompress=decompress,
+            h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
+        return read(
+            name,
+            h5f,
+            start_row,
+            n_rows,
+            idx,
+            use_h5idx,
+            field_mask,
+            obj_buf,
+            obj_buf_start,
+            decompress,
         )
     def write(

legend-pydataobj 1.11.7__py3-none-any.whl → 1.12.0a2__py3-none-any.whl

legend-pydataobj 1.11.7py3-none-any.whl → 1.12.0a2py3-none-any.whl