PyPI - legend-pydataobj - Versions diffs - 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl - Mend

legend-pydataobj 1.9.0py3-none-any.whl → 1.10.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/METADATA +2 -2
legend_pydataobj-1.10.1.dist-info/RECORD +55 -0
{legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/WHEEL +1 -1
lgdo/_version.py +2 -2
lgdo/compression/radware.py +8 -16
lgdo/lh5/_serializers/read/array.py +9 -9
lgdo/lh5/_serializers/read/composite.py +67 -78
lgdo/lh5/_serializers/read/encoded.py +31 -9
lgdo/lh5/_serializers/read/ndarray.py +55 -42
lgdo/lh5/_serializers/read/scalar.py +10 -3
lgdo/lh5/_serializers/read/utils.py +165 -3
lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
lgdo/lh5/_serializers/write/array.py +6 -1
lgdo/lh5/_serializers/write/composite.py +14 -5
lgdo/lh5/_serializers/write/scalar.py +6 -1
lgdo/lh5/core.py +81 -7
lgdo/lh5/exceptions.py +3 -3
lgdo/lh5/iterator.py +258 -74
lgdo/lh5/store.py +116 -12
lgdo/lh5/tools.py +1 -1
lgdo/lh5/utils.py +29 -44
lgdo/types/histogram.py +122 -6
lgdo/types/table.py +2 -2
lgdo/types/vectorofvectors.py +1 -1
legend_pydataobj-1.9.0.dist-info/RECORD +0 -55
{legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/LICENSE +0 -0
{legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.9.0.dist-info → legend_pydataobj-1.10.1.dist-info}/top_level.txt +0 -0

lgdo/lh5/core.py CHANGED Viewed

@@ -1,15 +1,18 @@
 from __future__ import annotations
+import bisect
 import inspect
 import sys
 from collections.abc import Mapping, Sequence
 from typing import Any
 import h5py
+import numpy as np
 from numpy.typing import ArrayLike
 from .. import types
 from . import _serializers
+from .utils import read_n_rows
 def read(
@@ -23,6 +26,7 @@ def read(
     obj_buf: types.LGDO = None,
     obj_buf_start: int = 0,
     decompress: bool = True,
+    locking: bool = False,
 ) -> types.LGDO | tuple[types.LGDO, int]:
     """Read LH5 object data from a file.
@@ -97,6 +101,8 @@ def read(
         Decompress data encoded with LGDO's compression routines right
         after reading. The option has no effect on data encoded with HDF5
         built-in filters, which is always decompressed upstream by HDF5.
+    locking
+        Lock HDF5 file while reading
     Returns
     -------
@@ -110,17 +116,72 @@ def read(
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
     elif isinstance(lh5_file, str):
-        lh5_file = h5py.File(lh5_file, mode="r")
+        lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
         lh5_obj = lh5_file[name]
     else:
-        lh5_obj = []
-        for h5f in lh5_file:
-            if isinstance(h5f, str):
-                h5f = h5py.File(h5f, mode="r")  # noqa: PLW2901
-            lh5_obj += [h5f[name]]
+        lh5_files = list(lh5_file)
+        n_rows_read = 0
+        obj_buf_is_new = False
+        for i, h5f in enumerate(lh5_files):
+            if (
+                isinstance(idx, (list, tuple))
+                and len(idx) > 0
+                and not np.isscalar(idx[0])
+            ):
+                # a list of lists: must be one per file
+                idx_i = idx[i]
+            elif idx is not None:
+                # make idx a proper tuple if it's not one already
+                if not (isinstance(idx, tuple) and len(idx) == 1):
+                    idx = (idx,)
+                # idx is a long continuous array
+                n_rows_i = read_n_rows(name, h5f)
+                # find the length of the subset of idx that contains indices
+                # that are less than n_rows_i
+                n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
+                # now split idx into idx_i and the remainder
+                idx_i = np.array(idx[0])[:n_rows_to_read_i]
+                idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
+            else:
+                idx_i = None
+            n_rows_i = n_rows - n_rows_read
+            obj_ret = read(
+                name,
+                h5f,
+                start_row,
+                n_rows_i,
+                idx_i,
+                use_h5idx,
+                field_mask,
+                obj_buf,
+                obj_buf_start,
+                decompress,
+            )
+            if isinstance(obj_ret, tuple):
+                obj_buf, n_rows_read_i = obj_ret
+                obj_buf_is_new = True
+            else:
+                obj_buf = obj_ret
+                n_rows_read_i = len(obj_buf)
+            n_rows_read += n_rows_read_i
+            if n_rows_read >= n_rows or obj_buf is None:
+                return obj_buf, n_rows_read
+            start_row = 0
+            obj_buf_start += n_rows_read_i
+        return obj_buf if obj_buf_is_new else (obj_buf, n_rows_read)
+    if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
+        idx = idx[0]
+    if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
+        idx = np.where(idx)[0]
     obj, n_rows_read = _serializers._h5_read_lgdo(
-        lh5_obj,
+        lh5_obj.id,
+        lh5_obj.file.filename,
+        lh5_obj.name,
         start_row=start_row,
         n_rows=n_rows,
         idx=idx,
@@ -143,6 +204,7 @@ def write(
     n_rows: int | None = None,
     wo_mode: str = "append",
     write_start: int = 0,
+    page_buffer: int = 0,
     **h5py_kwargs,
 ) -> None:
     """Write an LGDO into an LH5 file.
@@ -218,6 +280,11 @@ def write(
     write_start
         row in the output file (if already existing) to start overwriting
         from.
+    page_buffer
+        enable paged aggregation with a buffer of this size in bytes
+        Only used when creating a new file. Useful when writing a file
+        with a large number of small datasets. This is a short-hand for
+        ``(fs_stragety="page", fs_pagesize=[page_buffer])``
     **h5py_kwargs
         additional keyword arguments forwarded to
         :meth:`h5py.Group.create_dataset` to specify, for example, an HDF5
@@ -225,6 +292,13 @@ def write(
         datasets. **Note: `compression` Ignored if compression is specified
         as an `obj` attribute.**
     """
+    if wo_mode in ("w", "write", "of", "overwrite_file"):
+        h5py_kwargs.update(
+            {
+                "fs_strategy": "page",
+                "fs_page_size": page_buffer,
+            }
+        )
     return _serializers._h5_write_lgdo(
         obj,
         name,

lgdo/lh5/exceptions.py CHANGED Viewed

@@ -4,11 +4,11 @@ import h5py
 class LH5DecodeError(Exception):
-    def __init__(self, message: str, obj: h5py.Dataset | h5py.Group) -> None:
+    def __init__(self, message: str, fname: str, oname: str) -> None:
         super().__init__(message)
-        self.file = obj.file.filename
-        self.obj = obj.name
+        self.file = fname
+        self.obj = oname
     def __str__(self) -> str:
         return (

legend-pydataobj 1.9.0__py3-none-any.whl → 1.10.1__py3-none-any.whl

legend-pydataobj 1.9.0py3-none-any.whl → 1.10.1py3-none-any.whl