PyPI - legend-pydataobj - Versions diffs - 1.13.0__tar.gz → 1.14.1__tar.gz - Mend

legend-pydataobj 1.13.0tar.gz → 1.14.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.13.0
+Version: 1.14.1
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/legend_pydataobj.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: legend_pydataobj
-Version: 1.13.0
+Version: 1.14.1
 Summary: LEGEND Python Data Objects
 Author: The LEGEND Collaboration
 Maintainer: The LEGEND Collaboration

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/legend_pydataobj.egg-info/SOURCES.txt RENAMED Viewed

@@ -78,6 +78,7 @@ tests/lh5/test_lh5_store.py
 tests/lh5/test_lh5_tools.py
 tests/lh5/test_lh5_utils.py
 tests/lh5/test_lh5_write.py
+tests/lh5/test_pathlib.py
 tests/types/test_array.py
 tests/types/test_arrayofequalsizedarrays.py
 tests/types/test_encoded.py

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/_version.py RENAMED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '1.13.0'
-__version_tuple__ = version_tuple = (1, 13, 0)
+__version__ = version = '1.14.1'
+__version_tuple__ = version_tuple = (1, 14, 1)

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/compression/radware.py RENAMED Viewed

@@ -95,13 +95,13 @@ def encode(
     if isinstance(sig_in, np.ndarray):
         s = sig_in.shape
         if len(sig_in) == 0:
-            return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
+            return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
         if sig_out is None:
             # the encoded signal is an array of bytes
             # -> twice as long as a uint16
             # pre-allocate ubyte (uint8) array, expand last dimension
-            sig_out = np.empty(s[:-1] + (s[-1] * 2,), dtype=ubyte)
+            sig_out = np.empty((*s[:-1], s[-1] * 2), dtype=ubyte)
         if sig_out.dtype != ubyte:
             msg = "sig_out must be of type ubyte"
@@ -226,7 +226,7 @@ def decode(
             # allocate output array with lasd dim as large as the longest
             # uncompressed wf
             maxs = np.max(_get_hton_u16(sig_in[0], 0))
-            sig_out = np.empty(s[:-1] + (maxs,), dtype=int32)
+            sig_out = np.empty((*s[:-1], maxs), dtype=int32)
         # siglen has one dimension less (the last)
         siglen = np.empty(s[:-1], dtype=uint32)

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/compression/varlen.py RENAMED Viewed

@@ -74,14 +74,14 @@ def encode(
     if isinstance(sig_in, np.ndarray):
         s = sig_in.shape
         if len(sig_in) == 0:
-            return np.empty(s[:-1] + (0,), dtype=ubyte), np.empty(0, dtype=uint32)
+            return np.empty((*s[:-1], 0), dtype=ubyte), np.empty(0, dtype=uint32)
         if sig_out is None:
             # the encoded signal is an array of bytes
             # pre-allocate ubyte (uint8) array with a generous (but safe) size
             max_b = int(np.ceil(np.iinfo(sig_in.dtype).bits / 16) * 5)
             # expand last dimension
-            sig_out = np.empty(s[:-1] + (s[-1] * max_b,), dtype=ubyte)
+            sig_out = np.empty((*s[:-1], s[-1] * max_b), dtype=ubyte)
         if sig_out.dtype != ubyte:
             msg = "sig_out must be of type ubyte"

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/lh5/_serializers/read/ndarray.py RENAMED Viewed

@@ -57,7 +57,7 @@ def _h5_read_ndarray(
             (start_row,) + (0,) * (h5d.rank - 1),
             (1,) * h5d.rank,
             None,
-            (n_rows_to_read,) + fspace.shape[1:],
+            (n_rows_to_read, *fspace.shape[1:]),
         )
     elif use_h5idx:
         # Note that h5s will automatically merge adjacent elements into a range
@@ -67,7 +67,7 @@ def _h5_read_ndarray(
                 (i,) + (0,) * (h5d.rank - 1),
                 (1,) * h5d.rank,
                 None,
-                (1,) + fspace.shape[1:],
+                (1, *fspace.shape[1:]),
                 h5py.h5s.SELECT_OR,
             )
@@ -84,7 +84,7 @@ def _h5_read_ndarray(
                 (obj_buf_start,) + (0,) * (h5d.rank - 1),
                 (1,) * h5d.rank,
                 None,
-                (n_rows_to_read,) + fspace.shape[1:],
+                (n_rows_to_read, *fspace.shape[1:]),
             )
             h5d.read(mspace, fspace, obj_buf.nda)
         else:
@@ -93,10 +93,10 @@ def _h5_read_ndarray(
             obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
         nda = obj_buf.nda
     elif n_rows == 0:
-        tmp_shape = (0,) + h5d.shape[1:]
+        tmp_shape = (0, *h5d.shape[1:])
         nda = np.empty(tmp_shape, h5d.dtype)
     else:
-        mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
+        mspace = h5py.h5s.create_simple((n_rows_to_read, *fspace.shape[1:]))
         nda = np.empty(mspace.shape, h5d.dtype)
         if idx is None or use_h5idx:
             h5d.read(mspace, fspace, nda)

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/lh5/_serializers/write/array.py RENAMED Viewed

@@ -40,7 +40,7 @@ def _h5_write_array(
         # this is needed in order to have a resizable (in the first
         # axis) data set, i.e. rows can be appended later
         # NOTE: this automatically turns chunking on!
-        maxshape = (None,) + nda.shape[1:]
+        maxshape = (None, *nda.shape[1:])
         h5py_kwargs.setdefault("maxshape", maxshape)
         if wo_mode == "o" and name in group:

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/lh5/_serializers/write/composite.py RENAMED Viewed

@@ -52,140 +52,166 @@ def _h5_write_lgdo(
     # In hdf5, 'a' is really "modify" -- in addition to appending, you can
     # change any object in the file. So we use file:append for
     # write_object:overwrite.
+    opened_here = False
     if not isinstance(lh5_file, h5py.File):
         mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
-        lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
-    log.debug(
-        f"writing {obj!r}[{start_row}:{n_rows}] as "
-        f"{lh5_file.filename}:{group}/{name}[{write_start}:], "
-        f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
-    )
-    group = utils.get_h5_group(group, lh5_file)
+        try:
+            fh = h5py.File(lh5_file, mode=mode, **file_kwargs)
+        except OSError as oe:
+            raise LH5EncodeError(str(oe), lh5_file, None) from oe
-    # name already in file
-    if name in group or (
-        ("datatype" in group.attrs or group == "/")
-        and (len(name) <= 2 or "/" not in name[1:-1])
-    ):
-        pass
-    # group is in file but not struct or need to create nesting
+        opened_here = True
     else:
-        # check if name is nested
-        # if name is nested, iterate up from parent
-        # otherwise we just need to iterate the group
-        if len(name) > 2 and "/" in name[1:-1]:
-            group = utils.get_h5_group(
-                name[:-1].rsplit("/", 1)[0],
-                group,
-            )
-            curr_name = (
-                name.rsplit("/", 1)[1]
-                if name[-1] != "/"
-                else name[:-1].rsplit("/", 1)[1]
-            )
-        else:
-            curr_name = name
-        # initialize the object to be written
-        obj = types.Struct({curr_name.replace("/", ""): obj})
+        fh = lh5_file
-        # if base group already has a child we just append
-        if len(group) >= 1:
-            wo_mode = "ac"
-        else:
-            # iterate up the group hierarchy until we reach the root or a group with more than one child
-            while group.name != "/":
-                if len(group) > 1:
-                    break
-                curr_name = group.name
-                group = group.parent
-                if group.name != "/":
-                    obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
-                else:
-                    obj = types.Struct({curr_name[1:]: obj})
-            # if the group has more than one child, we need to append else we can overwrite
-            wo_mode = "ac" if len(group) > 1 else "o"
-        # set the new name
-        if group.name == "/":
-            name = "/"
-        elif group.parent.name == "/":
-            name = group.name[1:]
-        else:
-            name = group.name[len(group.parent.name) + 1 :]
-        # get the new group
-        group = utils.get_h5_group(group.parent if group.name != "/" else "/", lh5_file)
+    try:
+        log.debug(
+            f"writing {obj!r}[{start_row}:{n_rows}] as "
+            f"{fh.filename}:{group}/{name}[{write_start}:], "
+            f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
+        )
-    if wo_mode == "w" and name in group:
-        msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
-        raise LH5EncodeError(msg, lh5_file, group, name)
+        group = utils.get_h5_group(group, fh)
-    # struct, table, waveform table or histogram.
-    if isinstance(obj, types.Struct):
-        if (
-            isinstance(obj, types.Histogram)
-            and wo_mode not in ["w", "o", "of"]
-            and name in group
+        # name already in file
+        if name in group or (
+            ("datatype" in group.attrs or group == "/")
+            and (len(name) <= 2 or "/" not in name[1:-1])
         ):
-            msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
-            raise LH5EncodeError(msg, lh5_file, group, name)
-        if isinstance(obj, types.Histogram) and write_start != 0:
-            msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
-            raise LH5EncodeError(msg, lh5_file, group, name)
-        return _h5_write_struct(
-            obj,
-            name,
-            lh5_file,
-            group=group,
-            start_row=start_row,
-            n_rows=n_rows,  # if isinstance(obj, types.Table | types.Histogram) else None,
-            wo_mode=wo_mode,
-            write_start=write_start,
-            **h5py_kwargs,
-        )
-    # scalars
-    if isinstance(obj, types.Scalar):
-        return _h5_write_scalar(obj, name, lh5_file, group, wo_mode)
+            pass
+        # group is in file but not struct or need to create nesting
+        else:
+            # check if name is nested
+            # if name is nested, iterate up from parent
+            # otherwise we just need to iterate the group
+            if len(name) > 2 and "/" in name[1:-1]:
+                group = utils.get_h5_group(
+                    name[:-1].rsplit("/", 1)[0],
+                    group,
+                )
+                curr_name = (
+                    name.rsplit("/", 1)[1]
+                    if name[-1] != "/"
+                    else name[:-1].rsplit("/", 1)[1]
+                )
+            else:
+                curr_name = name
+            # initialize the object to be written
+            obj = types.Struct({curr_name.replace("/", ""): obj})
+            # if base group already has a child we just append
+            if len(group) >= 1:
+                wo_mode = "ac"
+            else:
+                # iterate up the group hierarchy until we reach the root or a group with more than one child
+                while group.name != "/":
+                    if len(group) > 1:
+                        break
+                    curr_name = group.name
+                    group = group.parent
+                    if group.name != "/":
+                        obj = types.Struct({curr_name[len(group.name) + 1 :]: obj})
+                    else:
+                        obj = types.Struct({curr_name[1:]: obj})
+                # if the group has more than one child, we need to append else we can overwrite
+                wo_mode = "ac" if len(group) > 1 else "o"
+            # set the new name
+            if group.name == "/":
+                name = "/"
+            elif group.parent.name == "/":
+                name = group.name[1:]
+            else:
+                name = group.name[len(group.parent.name) + 1 :]
+            # get the new group
+            group = utils.get_h5_group(group.parent if group.name != "/" else "/", fh)
+        if wo_mode == "w" and name in group:
+            msg = f"can't overwrite '{name}' in wo_mode 'write_safe'"
+            raise LH5EncodeError(msg, fh, group, name)
+        # struct, table, waveform table or histogram.
+        if isinstance(obj, types.Struct):
+            if (
+                isinstance(obj, types.Histogram)
+                and wo_mode not in ["w", "o", "of"]
+                and name in group
+            ):
+                msg = f"can't append-write to histogram in wo_mode '{wo_mode}'"
+                raise LH5EncodeError(msg, fh, group, name)
+            if isinstance(obj, types.Histogram) and write_start != 0:
+                msg = f"can't write histogram in wo_mode '{wo_mode}' with write_start != 0"
+                raise LH5EncodeError(msg, fh, group, name)
+            return _h5_write_struct(
+                obj,
+                name,
+                fh,
+                group=group,
+                start_row=start_row,
+                n_rows=n_rows,  # if isinstance(obj, types.Table | types.Histogram) else None,
+                wo_mode=wo_mode,
+                write_start=write_start,
+                **h5py_kwargs,
+            )
-    # vector of encoded vectors
-    if isinstance(
-        obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
-    ):
-        group = utils.get_h5_group(
-            name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
-        )
+        # scalars
+        if isinstance(obj, types.Scalar):
+            return _h5_write_scalar(obj, name, fh, group, wo_mode)
-        # ask not to further compress flattened_data, it is already compressed!
-        obj.encoded_data.flattened_data.attrs["compression"] = None
+        # vector of encoded vectors
+        if isinstance(
+            obj, (types.VectorOfEncodedVectors, types.ArrayOfEncodedEqualSizedArrays)
+        ):
+            group = utils.get_h5_group(
+                name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
+            )
-        _h5_write_vector_of_vectors(
-            obj.encoded_data,
-            "encoded_data",
-            lh5_file,
-            group=group,
-            start_row=start_row,
-            n_rows=n_rows,
-            wo_mode=wo_mode,
-            write_start=write_start,
-            **h5py_kwargs,
-        )
+            # ask not to further compress flattened_data, it is already compressed!
+            obj.encoded_data.flattened_data.attrs["compression"] = None
-        if isinstance(obj.decoded_size, types.Scalar):
-            _h5_write_scalar(
-                obj.decoded_size,
-                "decoded_size",
-                lh5_file,
+            _h5_write_vector_of_vectors(
+                obj.encoded_data,
+                "encoded_data",
+                fh,
                 group=group,
+                start_row=start_row,
+                n_rows=n_rows,
                 wo_mode=wo_mode,
+                write_start=write_start,
+                **h5py_kwargs,
             )
-        else:
-            _h5_write_array(
-                obj.decoded_size,
-                "decoded_size",
-                lh5_file,
+            if isinstance(obj.decoded_size, types.Scalar):
+                _h5_write_scalar(
+                    obj.decoded_size,
+                    "decoded_size",
+                    fh,
+                    group=group,
+                    wo_mode=wo_mode,
+                )
+            else:
+                _h5_write_array(
+                    obj.decoded_size,
+                    "decoded_size",
+                    fh,
+                    group=group,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    wo_mode=wo_mode,
+                    write_start=write_start,
+                    **h5py_kwargs,
+                )
+            return None
+        # vector of vectors
+        if isinstance(obj, types.VectorOfVectors):
+            return _h5_write_vector_of_vectors(
+                obj,
+                name,
+                fh,
                 group=group,
                 start_row=start_row,
                 n_rows=n_rows,
@@ -194,38 +220,25 @@ def _h5_write_lgdo(
                 **h5py_kwargs,
             )
-        return None
-    # vector of vectors
-    if isinstance(obj, types.VectorOfVectors):
-        return _h5_write_vector_of_vectors(
-            obj,
-            name,
-            lh5_file,
-            group=group,
-            start_row=start_row,
-            n_rows=n_rows,
-            wo_mode=wo_mode,
-            write_start=write_start,
-            **h5py_kwargs,
-        )
-    # if we get this far, must be one of the Array types
-    if isinstance(obj, types.Array):
-        return _h5_write_array(
-            obj,
-            name,
-            lh5_file,
-            group=group,
-            start_row=start_row,
-            n_rows=n_rows,
-            wo_mode=wo_mode,
-            write_start=write_start,
-            **h5py_kwargs,
-        )
+        # if we get this far, must be one of the Array types
+        if isinstance(obj, types.Array):
+            return _h5_write_array(
+                obj,
+                name,
+                fh,
+                group=group,
+                start_row=start_row,
+                n_rows=n_rows,
+                wo_mode=wo_mode,
+                write_start=write_start,
+                **h5py_kwargs,
+            )
-    msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
-    raise LH5EncodeError(msg, lh5_file, group, name)
+        msg = f"do not know how to write '{name}' of type '{type(obj).__name__}'"
+        raise LH5EncodeError(msg, fh, group, name)
+    finally:
+        if opened_here:
+            fh.close()
 def _h5_write_struct(

{legend_pydataobj-1.13.0 → legend_pydataobj-1.14.1}/src/lgdo/lh5/core.py RENAMED Viewed

@@ -14,12 +14,13 @@ from numpy.typing import ArrayLike
 from .. import types
 from . import _serializers
+from .exceptions import LH5DecodeError
 from .utils import read_n_rows
 def read(
     name: str,
-    lh5_file: str | h5py.File | Sequence[str | h5py.File],
+    lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
     start_row: int = 0,
     n_rows: int = sys.maxsize,
     idx: ArrayLike = None,
@@ -110,15 +111,20 @@ def read(
     object
         the read-out object
     """
+    close_after = False
     if isinstance(lh5_file, h5py.File):
         lh5_obj = lh5_file[name]
-    elif isinstance(lh5_file, str):
-        lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
+    elif isinstance(lh5_file, (str, Path)):
+        try:
+            lh5_file = h5py.File(str(Path(lh5_file)), mode="r", locking=locking)
+        except (OSError, FileExistsError) as oe:
+            raise LH5DecodeError(str(oe), lh5_file, None) from oe
+        close_after = True
         try:
             lh5_obj = lh5_file[name]
         except KeyError as ke:
-            err = f"Object {name} not found in file {lh5_file.filename}"
-            raise KeyError(err) from ke
+            raise LH5DecodeError(str(ke), lh5_file, name) from ke
     else:
         if obj_buf is not None:
             obj_buf.resize(obj_buf_start)
@@ -173,29 +179,32 @@ def read(
     if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
         idx = np.where(idx)[0]
-    obj, n_rows_read = _serializers._h5_read_lgdo(
-        lh5_obj.id,
-        lh5_obj.file.filename,
-        lh5_obj.name,
-        start_row=start_row,
-        n_rows=n_rows,
-        idx=idx,
-        use_h5idx=use_h5idx,
-        field_mask=field_mask,
-        obj_buf=obj_buf,
-        obj_buf_start=obj_buf_start,
-        decompress=decompress,
-    )
-    with suppress(AttributeError):
-        obj.resize(obj_buf_start + n_rows_read)
-    return obj
+    try:
+        obj, n_rows_read = _serializers._h5_read_lgdo(
+            lh5_obj.id,
+            lh5_obj.file.filename,
+            lh5_obj.name,
+            start_row=start_row,
+            n_rows=n_rows,
+            idx=idx,
+            use_h5idx=use_h5idx,
+            field_mask=field_mask,
+            obj_buf=obj_buf,
+            obj_buf_start=obj_buf_start,
+            decompress=decompress,
+        )
+        with suppress(AttributeError):
+            obj.resize(obj_buf_start + n_rows_read)
+        return obj
+    finally:
+        if close_after:
+            lh5_file.close()
 def write(
     obj: types.LGDO,
     name: str,
-    lh5_file: str | h5py.File,
+    lh5_file: str | Path | h5py.File,
     group: str | h5py.Group = "/",
     start_row: int = 0,
     n_rows: int | None = None,
@@ -318,7 +327,7 @@ def write(
 def read_as(
     name: str,
-    lh5_file: str | h5py.File | Sequence[str | h5py.File],
+    lh5_file: str | Path | h5py.File | Sequence[str | Path | h5py.File],
     library: str,
     **kwargs,
 ) -> Any:

legend_pydataobj-1.14.1/src/lgdo/lh5/exceptions.py ADDED Viewed

@@ -0,0 +1,55 @@
+from __future__ import annotations
+import h5py
+class LH5DecodeError(Exception):
+    def __init__(
+        self, message: str, file: str | h5py.File, oname: str | None = None
+    ) -> None:
+        super().__init__(message)
+        self.file = file.filename if isinstance(file, h5py.File) else file
+        self.obj = oname
+    def __str__(self) -> str:
+        if self.obj is None:
+            msg = f"while opening file {self.file} for decoding: "
+        else:
+            msg = f"while decoding object '{self.obj}' in file {self.file}: "
+        return msg + super().__str__()
+    def __reduce__(self) -> tuple:  # for pickling.
+        return self.__class__, (*self.args, self.file, self.obj)
+class LH5EncodeError(Exception):
+    def __init__(
+        self,
+        message: str,
+        file: str | h5py.File,
+        group: str | h5py.Group | None = None,
+        name: str | None = None,
+    ) -> None:
+        super().__init__(message)
+        self.file = file.filename if isinstance(file, h5py.File) else file
+        self.group = (
+            (group.name if isinstance(file, h5py.File) else group).rstrip("/")
+            if group is not None
+            else None
+        )
+        self.name = name.lstrip("/") if name is not None else None
+    def __str__(self) -> str:
+        if self.name is None:
+            msg = f"while opening file {self.file} for encoding: "
+        else:
+            msg = (
+                f"while encoding object {self.group}/{self.name} to file {self.file}: "
+            )
+        return msg + super().__str__()
+    def __reduce__(self) -> tuple:  # for pickling.
+        return self.__class__, (*self.args, self.file, self.group, self.name)

legend-pydataobj 1.13.0__tar.gz → 1.14.1__tar.gz

legend-pydataobj 1.13.0tar.gz → 1.14.1tar.gz