PyPI - legend-pydataobj - Versions diffs - 1.11.12__py3-none-any.whl → 1.12.0__py3-none-any.whl - Mend

legend-pydataobj 1.11.12py3-none-any.whl → 1.12.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/METADATA +1 -1
{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/RECORD +27 -27
{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/WHEEL +1 -1
lgdo/__init__.py +5 -4
lgdo/_version.py +2 -2
lgdo/lh5/__init__.py +3 -5
lgdo/lh5/_serializers/read/composite.py +1 -3
lgdo/lh5/_serializers/write/array.py +2 -3
lgdo/lh5/_serializers/write/composite.py +2 -2
lgdo/lh5/concat.py +3 -9
lgdo/lh5/core.py +33 -32
lgdo/lh5/iterator.py +48 -27
lgdo/lh5/settings.py +34 -0
lgdo/lh5/store.py +22 -75
lgdo/lh5/tools.py +0 -111
lgdo/lh5/utils.py +6 -4
lgdo/types/array.py +84 -15
lgdo/types/encoded.py +25 -20
lgdo/types/histogram.py +1 -1
lgdo/types/lgdo.py +50 -0
lgdo/types/table.py +50 -28
lgdo/types/vectorofvectors.py +132 -94
lgdo/types/vovutils.py +14 -4
lgdo/types/waveformtable.py +19 -21
lgdo/lh5_store.py +0 -284
{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/licenses/LICENSE +0 -0
{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/top_level.txt +0 -0

{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.11.12
+Version: 1.12.0
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,8 @@
-legend_pydataobj-1.11.12.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
-lgdo/_version.py,sha256=hxlOhf24ZalCiOeNPQ1JrEkKFcqpC8KtaFBcmCfZxFU,515
+legend_pydataobj-1.12.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
+lgdo/_version.py,sha256=X7AXkrxMLYa0fUCdwZA2oOfiFkQJiuenTXzRghkc4eU,513
 lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
 lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
-lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
 lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
 lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
 lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -13,44 +12,45 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
 lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
 lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
 lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
-lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
-lgdo/lh5/concat.py,sha256=5nO7dNSb0UEP9rZiWGTKH5Cfwsm5LSm3tBJM4Kd70u0,6336
-lgdo/lh5/core.py,sha256=HT50rolOtTijgaGFskRgzoRbC0w-kxrRS2v9O5Q9Ugo,14067
+lgdo/lh5/__init__.py,sha256=smHTawINIiogHNfYJq3aPvtxleTnBMdPADRCdc1wea8,748
+lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
+lgdo/lh5/core.py,sha256=U0ZZk6EmojRRYFBEo_bMy7jZ3SKBU41MIsSulyFxZIU,13752
 lgdo/lh5/datatype.py,sha256=ry3twFaosuBoskiTKqtBYRMk9PQAf403593xKaItfog,1827
 lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
-lgdo/lh5/iterator.py,sha256=ZaBBnmuNIjinwO0JUY55wLxX8Om9rVRRzXBC5uHmSKM,19772
-lgdo/lh5/store.py,sha256=3wAaQDd1Zmo0_bQ9DbB-FbKS4Uy_Tb642qKHXtZpSw4,10643
-lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
-lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
+lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
+lgdo/lh5/settings.py,sha256=cmPd6ZvneAF5sFMA1qf-9g_YSSygJcQSRmZDp1_sBEU,1001
+lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
+lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
+lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
 lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
 lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
-lgdo/lh5/_serializers/read/composite.py,sha256=yTm5dfTgkIL7eG9iZXxhdiRhG04cQLd_hybP4wmxCJE,11809
+lgdo/lh5/_serializers/read/composite.py,sha256=UvkZHEhf0V7SFLxzF52eyP68hU0guGOLqosrfmIfeys,11729
 lgdo/lh5/_serializers/read/encoded.py,sha256=Q98c08d8LkZq2AlY4rThYECVaEqwbv4T2Urn7TGnsyE,4130
 lgdo/lh5/_serializers/read/ndarray.py,sha256=lFCXD6bSzmMOH7cVmvRYXakkfMCI8EoqTPNONRJ1F0s,3690
 lgdo/lh5/_serializers/read/scalar.py,sha256=kwhWm1T91pXf86CqtUUD8_qheSR92gXZrQVtssV5YCg,922
 lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNxQVJXd0,7581
 lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
 lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
-lgdo/lh5/_serializers/write/composite.py,sha256=qYJIqpQxc1a0hmazxYCPMv-ar9_TsyK-zWcBmPleMfM,10011
+lgdo/lh5/_serializers/write/array.py,sha256=gAB1EjPa9YojPqohVwY_VPeuY7_bLN-lttCmfgty-qk,3175
+lgdo/lh5/_serializers/write/composite.py,sha256=eX5an6YZ5I7zf1z90mfzKYYJQoS-ux10rjDrUdevW6Y,10025
 lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
 lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
 lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
-lgdo/types/array.py,sha256=sUxh1CNCaefrnybt5qdjmmMpVQa_RqFxUv1tJ_pyBbc,6537
+lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
 lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
-lgdo/types/encoded.py,sha256=JW4U5ow7KLMzhKnmhdnxbC3SZJAs4bOEDZWKG4KY1uU,15293
+lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
 lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
-lgdo/types/histogram.py,sha256=y6j2VDuGYYnLy7WI4J90ApS0PAwic4kCpouZPX09Nus,19974
-lgdo/types/lgdo.py,sha256=RQ2P70N7IWMBDnLLuJI3sm6zQTIKyOMSsKZtBNzmE90,2928
+lgdo/types/histogram.py,sha256=Jz1lLH56BfYnmcUhxUHK1h2wLDQ0Abgyd-6LznU-3-k,19979
+lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
 lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
 lgdo/types/struct.py,sha256=m3pYfGfKptV8ti3wb4n1nsPKMvhjdWCFoRdR5YooZBM,6353
-lgdo/types/table.py,sha256=VIHQOPXJHJgiCjMMb_p7EdbcCqLFSObHMdHSxC1Dm5Y,19212
-lgdo/types/vectorofvectors.py,sha256=K8w7CZou857I9YGkeOe2uYB20gbHl4OV9xhnnJPNOjc,24665
-lgdo/types/vovutils.py,sha256=7BWPP0BSj-92ifbCIUBcfqxG5-TS8uxujTyJJuDFI04,10302
-lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
-legend_pydataobj-1.11.12.dist-info/METADATA,sha256=SzKnU5fc-xu0hVxQ3n5Zzq8tV-ws4Aw3L4A7mt95_Ic,44444
-legend_pydataobj-1.11.12.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
-legend_pydataobj-1.11.12.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
-legend_pydataobj-1.11.12.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
-legend_pydataobj-1.11.12.dist-info/RECORD,,
+lgdo/types/table.py,sha256=hvOwhFkm-_CkNhGmD8SJoeepZcwFY6ItYOS76LztKtA,20158
+lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
+lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
+lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
+legend_pydataobj-1.12.0.dist-info/METADATA,sha256=76785CT-1QRlVf6WOFnbnRWUiC6zSUnMxFR2km15kQ4,44443
+legend_pydataobj-1.12.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+legend_pydataobj-1.12.0.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
+legend_pydataobj-1.12.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
+legend_pydataobj-1.12.0.dist-info/RECORD,,

{legend_pydataobj-1.11.12.dist-info → legend_pydataobj-1.12.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.3.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

lgdo/__init__.py CHANGED Viewed

@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
 from __future__ import annotations
 from ._version import version as __version__
-from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
+from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
 from .types import (
     LGDO,
     Array,
@@ -69,7 +69,6 @@ __all__ = [
     "FixedSizeArray",
     "Histogram",
     "LH5Iterator",
-    "LH5Store",
     "Scalar",
     "Struct",
     "Table",
@@ -77,8 +76,10 @@ __all__ = [
     "VectorOfVectors",
     "WaveformTable",
     "__version__",
-    "load_dfs",
-    "load_nda",
     "ls",
+    "read",
+    "read_as",
+    "read_n_rows",
     "show",
+    "write",
 ]

lgdo/_version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.11.12'
-__version_tuple__ = version_tuple = (1, 11, 12)
+__version__ = version = '1.12.0'
+__version_tuple__ = version_tuple = (1, 12, 0)

lgdo/lh5/__init__.py CHANGED Viewed

@@ -7,24 +7,22 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
 from __future__ import annotations
-from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
 from .core import read, read_as, write
 from .iterator import LH5Iterator
 from .store import LH5Store
-from .tools import load_dfs, load_nda, ls, show
+from .tools import ls, show
 from .utils import read_n_rows
 __all__ = [
-    "DEFAULT_HDF5_SETTINGS",
     "LH5Iterator",
     "LH5Store",
     "concat",
-    "load_dfs",
-    "load_nda",
+    "default_hdf5_settings",
     "ls",
     "read",
     "read_as",
     "read_n_rows",
+    "reset_default_hdf5_settings",
     "show",
     "write",
 ]

lgdo/lh5/_serializers/read/composite.py CHANGED Viewed

@@ -353,15 +353,13 @@ def _h5_read_table(
             table = Table(col_dict=col_dict, attrs=attrs)
         # set (write) loc to end of tree
-        table.loc = n_rows_read
+        table.resize(do_warn=True)
         return table, n_rows_read
     # We have read all fields into the object buffer. Run
     # checks: All columns should be the same size. So update
     # table's size as necessary, warn if any mismatches are found
     obj_buf.resize(do_warn=True)
-    # set (write) loc to end of tree
-    obj_buf.loc = obj_buf_start + n_rows_read
     # check attributes
     utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)

lgdo/lh5/_serializers/write/array.py CHANGED Viewed

@@ -6,12 +6,11 @@ import h5py
 import numpy as np
 from .... import types
+from ... import settings
 from ...exceptions import LH5EncodeError
 log = logging.getLogger(__name__)
-DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
 def _h5_write_array(
     obj,
@@ -49,7 +48,7 @@ def _h5_write_array(
             del group[name]
         # set default compression options
-        for k, v in DEFAULT_HDF5_SETTINGS.items():
+        for k, v in settings.DEFAULT_HDF5_SETTINGS.items():
             h5py_kwargs.setdefault(k, v)
         # compress using the 'compression' LGDO attribute, if available

lgdo/lh5/_serializers/write/composite.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from __future__ import annotations
 import logging
-import os
 from inspect import signature
+from pathlib import Path
 import h5py
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
     # change any object in the file. So we use file:append for
     # write_object:overwrite.
     if not isinstance(lh5_file, h5py.File):
-        mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
+        mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
         lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
     log.debug(

lgdo/lh5/concat.py CHANGED Viewed

@@ -76,7 +76,7 @@ def _get_lgdos(file, obj_list):
                 continue
             # read as little as possible
-            obj, _ = store.read(current, h5f0, n_rows=1)
+            obj = store.read(current, h5f0, n_rows=1)
             if isinstance(obj, (Table, Array, VectorOfVectors)):
                 lgdos.append(current)
@@ -139,12 +139,6 @@ def _remove_nested_fields(lgdos: dict, obj_list: list):
         _inplace_table_filter(key, val, obj_list)
-def _slice(obj, n_rows):
-    ak_obj = obj.view_as("ak")[:n_rows]
-    obj_type = type(obj)
-    return obj_type(ak_obj)
 def lh5concat(
     lh5_files: list,
     output: str,
@@ -186,8 +180,8 @@ def lh5concat(
     # loop over lgdo objects
     for lgdo in lgdos:
         # iterate over the files
-        for lh5_obj, _, n_rows in LH5Iterator(lh5_files, lgdo):
-            data = {lgdo: _slice(lh5_obj, n_rows)}
+        for lh5_obj in LH5Iterator(lh5_files, lgdo):
+            data = {lgdo: lh5_obj}
             # remove the nested fields
             _remove_nested_fields(data, obj_list)

lgdo/lh5/core.py CHANGED Viewed

@@ -4,6 +4,8 @@ import bisect
 import inspect
 import sys
 from collections.abc import Mapping, Sequence
+from contextlib import suppress
+from pathlib import Path
 from typing import Any
 import h5py
@@ -92,8 +94,7 @@ def read(
         will be set to ``True``, while the rest will default to ``False``.
     obj_buf
         Read directly into memory provided in `obj_buf`. Note: the buffer
-        will be expanded to accommodate the data requested. To maintain the
-        buffer length, send in ``n_rows = len(obj_buf)``.
+        will be resized to accommodate the data retrieved.
     obj_buf_start
         Start location in ``obj_buf`` for read. For concatenating data to
         array-like objects.
@@ -106,25 +107,25 @@ def read(
     Returns
     -------
-    (object, n_rows_read)
-        `object` is the read-out object `n_rows_read` is the number of rows
-        successfully read out. Essential for arrays when the amount of data
-        is smaller than the object buffer.  For scalars and structs
-        `n_rows_read` will be``1``. For tables it is redundant with
-        ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
+    object
+        the read-out object
     """
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
     elif isinstance(lh5_file, str):
         lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
-        lh5_obj = lh5_file[name]
+        try:
+            lh5_obj = lh5_file[name]
+        except KeyError as ke:
+            err = f"Object {name} not found in file {lh5_file.filename}"
+            raise KeyError(err) from ke
     else:
-        lh5_files = list(lh5_file)
-        n_rows_read = 0
-        obj_buf_is_new = False
+        if obj_buf is not None:
+            obj_buf.resize(obj_buf_start)
+        else:
+            obj_buf_start = 0
-        for i, h5f in enumerate(lh5_files):
+        for i, h5f in enumerate(lh5_file):
             if (
                 isinstance(idx, (list, tuple))
                 and len(idx) > 0
@@ -146,33 +147,26 @@ def read(
                 idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
             else:
                 idx_i = None
-            n_rows_i = n_rows - n_rows_read
-            obj_ret = read(
+            obj_buf_start_i = len(obj_buf) if obj_buf else 0
+            n_rows_i = n_rows - (obj_buf_start_i - obj_buf_start)
+            obj_buf = read(
                 name,
                 h5f,
-                start_row,
+                start_row if i == 0 else 0,
                 n_rows_i,
                 idx_i,
                 use_h5idx,
                 field_mask,
                 obj_buf,
-                obj_buf_start,
+                obj_buf_start_i,
                 decompress,
             )
-            if isinstance(obj_ret, tuple):
-                obj_buf, n_rows_read_i = obj_ret
-                obj_buf_is_new = True
-            else:
-                obj_buf = obj_ret
-                n_rows_read_i = len(obj_buf)
-            n_rows_read += n_rows_read_i
-            if n_rows_read >= n_rows or obj_buf is None:
-                return obj_buf, n_rows_read
-            start_row = 0
-            obj_buf_start += n_rows_read_i
-        return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
+            if obj_buf is None or (len(obj_buf) - obj_buf_start) >= n_rows:
+                return obj_buf
+        return obj_buf
     if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
         idx = idx[0]
@@ -192,8 +186,10 @@ def read(
         obj_buf_start=obj_buf_start,
         decompress=decompress,
     )
+    with suppress(AttributeError):
+        obj.resize(obj_buf_start + n_rows_read)
-    return obj if obj_buf is None else (obj, n_rows_read)
+    return obj
 def write(
@@ -295,7 +291,12 @@ def write(
         datasets. **Note: `compression` Ignored if compression is specified
         as an `obj` attribute.**
     """
-    if wo_mode in ("w", "write", "of", "overwrite_file"):
+    if (
+        isinstance(lh5_file, str)
+        and not Path(lh5_file).is_file()
+        and wo_mode in ("w", "write_safe", "of", "overwrite_file")
+    ):
         h5py_kwargs.update(
             {
                 "fs_strategy": "page",

lgdo/lh5/iterator.py CHANGED Viewed

@@ -24,7 +24,8 @@ class LH5Iterator(typing.Iterator):
     This can be used as an iterator:
-    >>> for lh5_obj, i_entry, n_rows in LH5Iterator(...):
+    >>> for lh5_obj in LH5Iterator(...):
     >>>    # do the thing!
     This is intended for if you are reading a large quantity of data. This
@@ -42,6 +43,8 @@ class LH5Iterator(typing.Iterator):
     In addition to accessing requested data via ``lh5_obj``, several
     properties exist to tell you where that data came from:
+    - lh5_it.current_i_entry: get the index within the entry list of the
+      first entry that is currently read
     - lh5_it.current_local_entries: get the entry numbers relative to the
       file the data came from
     - lh5_it.current_global_entries: get the entry number relative to the
@@ -49,9 +52,9 @@ class LH5Iterator(typing.Iterator):
     - lh5_it.current_files: get the file name corresponding to each entry
     - lh5_it.current_groups: get the group name corresponding to each entry
-    This class can also be used either for random access:
+    This class can also be used for random access:
-    >>> lh5_obj, n_rows = lh5_it.read(i_entry)
+    >>> lh5_obj = lh5_it.read(i_entry)
     to read the block of entries starting at i_entry. In case of multiple files
     or the use of an event selection, i_entry refers to a global event index
@@ -65,6 +68,8 @@ class LH5Iterator(typing.Iterator):
         base_path: str = "",
         entry_list: list[int] | list[list[int]] | None = None,
         entry_mask: list[bool] | list[list[bool]] | None = None,
+        i_start: int = 0,
+        n_entries: int | None = None,
         field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
         buffer_len: int = "100*MB",
         file_cache: int = 10,
@@ -89,6 +94,10 @@ class LH5Iterator(typing.Iterator):
         entry_mask
             mask of entries to read. If a list of arrays is provided, expect
             one for each file. Ignore if a selection list is provided.
+        i_start
+            index of first entry to start at when iterating
+        n_entries
+            number of entries to read before terminating iteration
         field_mask
             mask of which fields to read. See :meth:`LH5Store.read` for
             more details.
@@ -183,7 +192,8 @@ class LH5Iterator(typing.Iterator):
             msg = f"can't open any files from {lh5_files}"
             raise RuntimeError(msg)
-        self.n_rows = 0
+        self.i_start = i_start
+        self.n_entries = n_entries
         self.current_i_entry = 0
         self.next_i_entry = 0
@@ -317,14 +327,21 @@ class LH5Iterator(typing.Iterator):
                 )
         return self.global_entry_list
-    def read(self, i_entry: int) -> tuple[LGDO, int]:
-        """Read the nextlocal chunk of events, starting at i_entry. Return the
-        LH5 buffer and number of rows read."""
-        self.n_rows = 0
-        i_file = np.searchsorted(self.entry_map, i_entry, "right")
+    def read(self, i_entry: int, n_entries: int | None = None) -> LGDO:
+        "Read the nextlocal chunk of events, starting at entry."
+        self.lh5_buffer.resize(0)
+        if n_entries is None:
+            n_entries = self.buffer_len
+        elif n_entries == 0:
+            return self.lh5_buffer
+        elif n_entries > self.buffer_len:
+            msg = "n_entries cannot be larger than buffer_len"
+            raise ValueError(msg)
         # if file hasn't been opened yet, search through files
         # sequentially until we find the right one
+        i_file = np.searchsorted(self.entry_map, i_entry, "right")
         if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("q").max:
             while i_file < len(self.lh5_files) and i_entry >= self._get_file_cumentries(
                 i_file
@@ -332,10 +349,10 @@ class LH5Iterator(typing.Iterator):
                 i_file += 1
         if i_file == len(self.lh5_files):
-            return (self.lh5_buffer, self.n_rows)
+            return self.lh5_buffer
         local_i_entry = i_entry - self._get_file_cumentries(i_file - 1)
-        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
+        while len(self.lh5_buffer) < n_entries and i_file < len(self.file_map):
             # Loop through files
             local_idx = self.get_file_entrylist(i_file)
             if local_idx is not None and len(local_idx) == 0:
@@ -344,18 +361,17 @@ class LH5Iterator(typing.Iterator):
                 continue
             i_local = local_i_entry if local_idx is None else local_idx[local_i_entry]
-            self.lh5_buffer, n_rows = self.lh5_st.read(
+            self.lh5_buffer = self.lh5_st.read(
                 self.groups[i_file],
                 self.lh5_files[i_file],
                 start_row=i_local,
-                n_rows=self.buffer_len - self.n_rows,
+                n_rows=n_entries - len(self.lh5_buffer),
                 idx=local_idx,
                 field_mask=self.field_mask,
                 obj_buf=self.lh5_buffer,
-                obj_buf_start=self.n_rows,
+                obj_buf_start=len(self.lh5_buffer),
             )
-            self.n_rows += n_rows
             i_file += 1
             local_i_entry = 0
@@ -364,7 +380,7 @@ class LH5Iterator(typing.Iterator):
         if self.friend is not None:
             self.friend.read(i_entry)
-        return (self.lh5_buffer, self.n_rows)
+        return self.lh5_buffer
     def reset_field_mask(self, mask):
         """Replaces the field mask of this iterator and any friends with mask"""
@@ -375,7 +391,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_local_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(self.n_rows, dtype="int32")
+        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -402,7 +418,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_global_entries(self) -> NDArray[int]:
         """Return list of local file entries in buffer"""
-        cur_entries = np.zeros(self.n_rows, dtype="int32")
+        cur_entries = np.zeros(len(self.lh5_buffer), dtype="int32")
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -433,7 +449,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_files(self) -> NDArray[str]:
         """Return list of file names for entries in buffer"""
-        cur_files = np.zeros(self.n_rows, dtype=object)
+        cur_files = np.zeros(len(self.lh5_buffer), dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -455,7 +471,7 @@ class LH5Iterator(typing.Iterator):
     @property
     def current_groups(self) -> NDArray[str]:
         """Return list of group names for entries in buffer"""
-        cur_groups = np.zeros(self.n_rows, dtype=object)
+        cur_groups = np.zeros(len(self.lh5_buffer), dtype=object)
         i_file = np.searchsorted(self.entry_map, self.current_i_entry, "right")
         file_start = self._get_file_cumentries(i_file - 1)
         i_local = self.current_i_entry - file_start
@@ -485,14 +501,19 @@ class LH5Iterator(typing.Iterator):
     def __iter__(self) -> typing.Iterator:
         """Loop through entries in blocks of size buffer_len."""
         self.current_i_entry = 0
-        self.next_i_entry = 0
+        self.next_i_entry = self.i_start
         return self
     def __next__(self) -> tuple[LGDO, int, int]:
-        """Read next buffer_len entries and return lh5_table, iterator entry
-        and n_rows read."""
-        buf, n_rows = self.read(self.next_i_entry)
-        self.next_i_entry = self.current_i_entry + n_rows
-        if n_rows == 0:
+        """Read next buffer_len entries and return lh5_table and iterator entry."""
+        n_entries = self.n_entries
+        if n_entries is not None:
+            n_entries = min(
+                self.buffer_len, n_entries + self.i_start - self.next_i_entry
+            )
+        buf = self.read(self.next_i_entry, n_entries)
+        if len(buf) == 0:
             raise StopIteration
-        return (buf, self.current_i_entry, n_rows)
+        self.next_i_entry = self.current_i_entry + len(buf)
+        return buf

lgdo/lh5/settings.py ADDED Viewed

@@ -0,0 +1,34 @@
+from __future__ import annotations
+from typing import Any
+def default_hdf5_settings() -> dict[str, Any]:
+    """Returns the HDF5 settings for writing data to disk to the pydataobj defaults.
+    Examples
+    --------
+    >>> from lgdo import lh5
+    >>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
+    >>> lh5.write(data, "data", "file.lh5")  # compressed with LZF
+    >>> lh5.DEFAULT_HDF5_SETTINGS = lh5.default_hdf5_settings()
+    >>> lh5.write(data, "data", "file.lh5", "of")  # compressed with default settings (GZIP)
+    """
+    return {
+        "shuffle": True,
+        "compression": "gzip",
+    }
+DEFAULT_HDF5_SETTINGS: dict[str, ...] = default_hdf5_settings()
+"""Global dictionary storing the default HDF5 settings for writing data to disk.
+Modify this global variable before writing data to disk with this package.
+Examples
+--------
+>>> from lgdo import lh5
+>>> lh5.DEFAULT_HDF5_SETTINGS["compression"] = "lzf"
+>>> lh5.write(data, "data", "file.lh5")  # compressed with LZF
+"""

legend-pydataobj 1.11.12__py3-none-any.whl → 1.12.0__py3-none-any.whl

legend-pydataobj 1.11.12py3-none-any.whl → 1.12.0py3-none-any.whl