PyPI - legend-pydataobj - Versions diffs - 1.10.0__tar.gz → 1.10.2__tar.gz - Mend

legend-pydataobj 1.10.0tar.gz → 1.10.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (94) hide show

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: legend_pydataobj
-Version: 1.10.0
+Version: 1.10.2
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/pyproject.toml RENAMED Viewed

@@ -137,6 +137,7 @@ ignore = [
   "PLR2004",  # Magic value used in comparison
   "ISC001",   # Conflicts with formatter
   "PT011",
+  "RUF013",   # complains if you default to None for an asinine reason
 ]
 isort.required-imports = ["from __future__ import annotations"]
 # Uncomment if using a _compat.typing backport
@@ -145,6 +146,7 @@ isort.required-imports = ["from __future__ import annotations"]
 [tool.ruff.lint.per-file-ignores]
 "tests/**" = ["T20"]
 "noxfile.py" = ["T20"]
+"docs/source/notebooks/*" = ["T201", "E402"]
 [tool.pylint]
 py-version = "3.8"

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/legend_pydataobj.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: legend_pydataobj
-Version: 1.10.0
+Version: 1.10.2
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.10.0'
-__version_tuple__ = version_tuple = (1, 10, 0)
+__version__ = version = '1.10.2'
+__version_tuple__ = version_tuple = (1, 10, 2)

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/compression/radware.py RENAMED Viewed

@@ -441,15 +441,11 @@ def _radware_sigcompress_encode(
         while (i < sig_in.size) and (i < j + 48):
             si_i = int16(sig_in[i] + shift)
             si_im1 = int16(sig_in[i - 1] + shift)
-            if max1 < si_i:
-                max1 = si_i
-            if min1 > si_i:
-                min1 = si_i
+            max1 = max(max1, si_i)
+            min1 = min(min1, si_i)
             ds = si_i - si_im1
-            if max2 < ds:
-                max2 = ds
-            if min2 > ds:
-                min2 = ds
+            max2 = max(max2, ds)
+            min2 = min(min2, ds)
             nw += 1
             i += 1
         if max1 - min1 <= max2 - min2:  # use absolute values
@@ -460,15 +456,13 @@ def _radware_sigcompress_encode(
                 i < j + 128
             ):  # FIXME: 128 could be tuned better?
                 si_i = int16(sig_in[i] + shift)
-                if max1 < si_i:
-                    max1 = si_i
+                max1 = max(max1, si_i)
                 dd1 = max1 - min1
                 if min1 > si_i:
                     dd1 = max1 - si_i
                 if dd1 > mask[nb1]:
                     break
-                if min1 > si_i:
-                    min1 = si_i
+                min1 = min(min1, si_i)
                 nw += 1
                 i += 1
         else:  # use difference values
@@ -481,15 +475,13 @@ def _radware_sigcompress_encode(
                 si_i = int16(sig_in[i] + shift)
                 si_im1 = int16(sig_in[i - 1] + shift)
                 ds = si_i - si_im1
-                if max2 < ds:
-                    max2 = ds
+                max2 = max(max2, ds)
                 dd2 = max2 - min2
                 if min2 > ds:
                     dd2 = max2 - ds
                 if dd2 > mask[nb2]:
                     break
-                if min2 > ds:
-                    min2 = ds
+                min2 = min(min2, ds)
                 nw += 1
                 i += 1

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/lh5/_serializers/read/composite.py RENAMED Viewed

@@ -103,8 +103,7 @@ def _h5_read_lgdo(
     if idx is not None:
         # check if idx is just an ordered list of the integers if so can ignore
         if (idx == np.arange(0, len(idx), 1)).all():
-            if n_rows > len(idx):
-                n_rows = len(idx)
+            n_rows = min(n_rows, len(idx))
             idx = None
         else:
             # chop off indices < start_row

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/lh5/_serializers/read/ndarray.py RENAMED Viewed

@@ -43,15 +43,14 @@ def _h5_read_ndarray(
     if idx is not None:
         if len(idx) > 0 and idx[-1] >= ds_n_rows:
             log.warning("idx indexed past the end of the array in the file. Culling...")
-            n_rows_to_read = bisect_left(idx[0], ds_n_rows)
-            idx = (idx[:n_rows_to_read],)
+            n_rows_to_read = bisect_left(idx, ds_n_rows)
+            idx = idx[:n_rows_to_read]
             if len(idx) == 0:
                 log.warning("idx empty after culling.")
         n_rows_to_read = len(idx)
     else:
         n_rows_to_read = ds_n_rows - start_row
-    if n_rows_to_read > n_rows:
-        n_rows_to_read = n_rows
+    n_rows_to_read = min(n_rows_to_read, n_rows)
     if idx is None:
         fspace.select_hyperslab(
@@ -112,6 +111,6 @@ def _h5_read_ndarray(
     # special handling for bools
     # (c and Julia store as uint8 so cast to bool)
     if datatype.get_nested_datatype_string(attrs["datatype"]) == "bool":
-        nda = nda.astype(np.bool_)
+        nda = nda.astype(np.bool_, copy=False)
     return (nda, attrs, n_rows_to_read)

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/lh5/_serializers/read/scalar.py RENAMED Viewed

@@ -7,7 +7,7 @@ import numpy as np
 from ....types import Scalar
 from ...exceptions import LH5DecodeError
-from .utils import read_attrs
+from . import utils
 log = logging.getLogger(__name__)
@@ -22,7 +22,7 @@ def _h5_read_scalar(
     sp = h5py.h5s.create(h5py.h5s.SCALAR)
     h5d.read(sp, sp, value)
     value = value[()]
-    attrs = read_attrs(h5d, fname, oname)
+    attrs = utils.read_attrs(h5d, fname, oname)
     # special handling for bools
     # (c and Julia store as uint8 so cast to bool)

legend_pydataobj-1.10.2/src/lgdo/lh5/_serializers/read/utils.py ADDED Viewed

@@ -0,0 +1,174 @@
+from __future__ import annotations
+import logging
+import h5py
+import numpy as np
+from .... import types
+from ... import datatype
+from ...exceptions import LH5DecodeError
+from . import scalar
+log = logging.getLogger(__name__)
+def check_obj_buf_attrs(attrs, new_attrs, fname, oname):
+    if set(attrs.keys()) != set(new_attrs.keys()):
+        msg = (
+            f"existing buffer and new data chunk have different attributes: "
+            f"obj_buf.attrs={attrs} != {fname}[{oname}].attrs={new_attrs}"
+        )
+        raise LH5DecodeError(msg, fname, oname)
+def read_attrs(h5o, fname, oname):
+    """Read all attributes for an hdf5 dataset or group using low level API
+    and return them as a dict. Assume all are strings or scalar types."""
+    attrs = {}
+    for i_attr in range(h5py.h5a.get_num_attrs(h5o)):
+        h5a = h5py.h5a.open(h5o, index=i_attr)
+        name = h5a.get_name().decode()
+        if h5a.shape != ():
+            msg = f"attribute {oname} is not a string or scalar"
+            raise LH5DecodeError(msg, fname, oname)
+        val = np.empty((), h5a.dtype)
+        h5a.read(val)
+        if h5a.get_type().get_class() == h5py.h5t.STRING:
+            attrs[name] = val.item().decode()
+        else:
+            attrs[name] = val.item()
+        h5a.close()
+    return attrs
+def read_n_rows(h5o, fname, oname):
+    """Read number of rows in LH5 object"""
+    if not h5py.h5a.exists(h5o, b"datatype"):
+        msg = "missing 'datatype' attribute"
+        raise LH5DecodeError(msg, fname, oname)
+    h5a = h5py.h5a.open(h5o, b"datatype")
+    type_attr = np.empty((), h5a.dtype)
+    h5a.read(type_attr)
+    type_attr = type_attr.item().decode()
+    lgdotype = datatype.datatype(type_attr)
+    # scalars are dim-0 datasets
+    if lgdotype is types.Scalar:
+        return None
+    # structs don't have rows
+    if lgdotype is types.Struct:
+        return None
+    # tables should have elements with all the same length
+    if lgdotype is types.Table:
+        # read out each of the fields
+        rows_read = None
+        for field in datatype.get_struct_fields(type_attr):
+            obj = h5py.h5o.open(h5o, field.encode())
+            n_rows_read = read_n_rows(obj, fname, field)
+            obj.close()
+            if not rows_read:
+                rows_read = n_rows_read
+            elif rows_read != n_rows_read:
+                log.warning(
+                    f"'{field}' field in table '{oname}' has {rows_read} rows, "
+                    f"{n_rows_read} was expected"
+                )
+        return rows_read
+    # length of vector of vectors is the length of its cumulative_length
+    if lgdotype is types.VectorOfVectors:
+        obj = h5py.h5o.open(h5o, b"cumulative_length")
+        n_rows = read_n_rows(obj, fname, "cumulative_length")
+        obj.close()
+        return n_rows
+    # length of vector of encoded vectors is the length of its decoded_size
+    if lgdotype in (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays):
+        obj = h5py.h5o.open(h5o, b"encoded_data")
+        n_rows = read_n_rows(obj, fname, "encoded_data")
+        obj.close()
+        return n_rows
+    # return array length (without reading the array!)
+    if issubclass(lgdotype, types.Array):
+        # compute the number of rows to read
+        return h5o.get_space().shape[0]
+    msg = f"don't know how to read rows of LGDO {lgdotype.__name__}"
+    raise LH5DecodeError(msg, fname, oname)
+def read_size_in_bytes(h5o, fname, oname, field_mask=None):
+    """Read number size in LH5 object in memory (in B)"""
+    if not h5py.h5a.exists(h5o, b"datatype"):
+        msg = "missing 'datatype' attribute"
+        raise LH5DecodeError(msg, fname, oname)
+    h5a = h5py.h5a.open(h5o, b"datatype")
+    type_attr = np.empty((), h5a.dtype)
+    h5a.read(type_attr)
+    type_attr = type_attr.item().decode()
+    lgdotype = datatype.datatype(type_attr)
+    # scalars are dim-0 datasets
+    if lgdotype in (
+        types.Scalar,
+        types.Array,
+        types.ArrayOfEqualSizedArrays,
+        types.FixedSizeArray,
+    ):
+        return int(np.prod(h5o.shape) * h5o.dtype.itemsize)
+    # structs don't have rows
+    if lgdotype in (types.Struct, types.Histogram, types.Histogram.Axis):
+        size = 0
+        for key in h5o:
+            obj = h5py.h5o.open(h5o, key)
+            size += read_size_in_bytes(obj, fname, oname, field_mask)
+            obj.close()
+        return size
+    # tables should have elements with all the same length
+    if lgdotype in (types.Table, types.WaveformTable):
+        # read out each of the fields
+        size = 0
+        if not field_mask:
+            field_mask = datatype.get_struct_fields(type_attr)
+        for field in field_mask:
+            obj = h5py.h5o.open(h5o, field.encode())
+            size += read_size_in_bytes(obj, fname, field)
+            obj.close()
+        return size
+    # length of vector of vectors is the length of its cumulative_length
+    if lgdotype is types.VectorOfVectors:
+        size = 0
+        obj = h5py.h5o.open(h5o, b"cumulative_length")
+        size += read_size_in_bytes(obj, fname, "cumulative_length")
+        obj.close()
+        obj = h5py.h5o.open(h5o, b"flattened_data")
+        size += read_size_in_bytes(obj, fname, "flattened_data")
+        obj.close()
+        return size
+    # length of vector of encoded vectors is the length of its decoded_size
+    if lgdotype is types.ArrayOfEncodedEqualSizedArrays:
+        obj = h5py.h5o.open(h5o, b"decoded_size")
+        size = scalar._h5_read_scalar(obj, fname, "decoded_size")[0].value
+        obj.close()
+        obj = h5py.h5o.open(h5o, b"encoded_data")
+        cl = h5py.h5o.open(obj, b"cumulative_length")
+        size *= cl.shape[0]
+        size *= 4  # TODO: UPDATE WHEN CODECS SUPPORT MORE DTYPES
+        obj.close()
+        return size
+    msg = f"don't know how to read size of LGDO {lgdotype.__name__}"
+    raise LH5DecodeError(msg, fname, oname)

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py RENAMED Viewed

@@ -156,7 +156,7 @@ def _h5_read_vector_of_vectors(
         # grow fd_buf if necessary to hold the data
         fdb_size = fd_buf_start + fd_n_rows
         if len(fd_buf) < fdb_size:
-            fd_buf.resize(fdb_size)
+            fd_buf.nda.resize(fdb_size, refcheck=False)
     # now read
     h5o = h5py.h5o.open(h5g, b"flattened_data")

{legend_pydataobj-1.10.0 → legend_pydataobj-1.10.2}/src/lgdo/lh5/core.py RENAMED Viewed

@@ -120,6 +120,7 @@ def read(
         lh5_obj = lh5_file[name]
     else:
         lh5_files = list(lh5_file)
         n_rows_read = 0
         obj_buf_is_new = False
@@ -175,6 +176,9 @@ def read(
     if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
         idx = idx[0]
+    if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
+        idx = np.where(idx)[0]
     obj, n_rows_read = _serializers._h5_read_lgdo(
         lh5_obj.id,
         lh5_obj.file.filename,
@@ -350,5 +354,8 @@ def read_as(
     # NOTE: providing a buffer does not make much sense
     obj = read(name, lh5_file, **kwargs1)
+    if isinstance(obj, tuple):
+        obj = obj[0]
     # and finally return a view
     return obj.view_as(library, **kwargs2)

legend-pydataobj 1.10.0__tar.gz → 1.10.2__tar.gz

legend-pydataobj 1.10.0tar.gz → 1.10.2tar.gz