PyPI - legend-pydataobj - Versions diffs - 1.0.0__py3-none-any.whl - Mend

legend-pydataobj 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

legend_pydataobj-1.0.0.dist-info/LICENSE +674 -0
legend_pydataobj-1.0.0.dist-info/METADATA +63 -0
legend_pydataobj-1.0.0.dist-info/RECORD +26 -0
legend_pydataobj-1.0.0.dist-info/WHEEL +5 -0
legend_pydataobj-1.0.0.dist-info/top_level.txt +1 -0
lgdo/__init__.py +75 -0
lgdo/_version.py +4 -0
lgdo/compression/__init__.py +36 -0
lgdo/compression/base.py +29 -0
lgdo/compression/generic.py +77 -0
lgdo/compression/radware.py +579 -0
lgdo/compression/utils.py +34 -0
lgdo/compression/varlen.py +449 -0
lgdo/lgdo_utils.py +196 -0
lgdo/lh5_store.py +1711 -0
lgdo/types/__init__.py +30 -0
lgdo/types/array.py +140 -0
lgdo/types/arrayofequalsizedarrays.py +133 -0
lgdo/types/encoded.py +390 -0
lgdo/types/fixedsizearray.py +43 -0
lgdo/types/lgdo.py +51 -0
lgdo/types/scalar.py +59 -0
lgdo/types/struct.py +108 -0
lgdo/types/table.py +349 -0
lgdo/types/vectorofvectors.py +627 -0
lgdo/types/waveform_table.py +264 -0

lgdo/types/__init__.py ADDED Viewed

@@ -0,0 +1,30 @@
+"""LEGEND Data Objects (LGDO) types."""
+from .array import Array
+from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
+from .encoded import ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors
+from .fixedsizearray import FixedSizeArray
+from .lgdo import LGDO
+from .scalar import Scalar
+from .struct import Struct
+from .table import Table
+from .vectorofvectors import VectorOfVectors
+from .waveform_table import WaveformTable
+__all__ = [
+    "Array",
+    "ArrayOfEqualSizedArrays",
+    "ArrayOfEncodedEqualSizedArrays",
+    "FixedSizeArray",
+    "LGDO",
+    "Scalar",
+    "Struct",
+    "Table",
+    "VectorOfVectors",
+    "VectorOfEncodedVectors",
+    "WaveformTable",
+]
+import numpy as np
+np.set_printoptions(threshold=10)

lgdo/types/array.py ADDED Viewed

@@ -0,0 +1,140 @@
+"""
+Implements a LEGEND Data Object representing an n-dimensional array and
+corresponding utilities.
+"""
+from __future__ import annotations
+import logging
+from collections.abc import Iterator
+from typing import Any
+import numpy as np
+from .. import lgdo_utils as utils
+from .lgdo import LGDO
+log = logging.getLogger(__name__)
+class Array(LGDO):
+    r"""Holds an :class:`numpy.ndarray` and attributes.
+    :class:`Array` (and the other various array types) holds an `nda` instead
+    of deriving from :class:`numpy.ndarray` for the following reasons:
+    - It keeps management of the `nda` totally under the control of the user. The
+      user can point it to another object's buffer, grab the `nda` and toss the
+      :class:`Array`, etc.
+    - It allows the management code to send just the `nda`'s the central routines
+      for data manpulation. Keeping LGDO's out of that code allows for more
+      standard, reusable, and (we expect) performant Python.
+    - It allows the first axis of the `nda` to be treated as "special" for storage
+      in :class:`.Table`\ s.
+    """
+    def __init__(
+        self,
+        nda: np.ndarray = None,
+        shape: tuple[int, ...] = (),
+        dtype: np.dtype = None,
+        fill_val: float | int = None,
+        attrs: dict[str, Any] = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        nda
+            An :class:`numpy.ndarray` to be used for this object's internal
+            array. Note: the array is used directly, not copied. If not
+            supplied, internal memory is newly allocated based on the shape and
+            dtype arguments.
+        shape
+            A numpy-format shape specification for shape of the internal
+            ndarray. Required if `nda` is ``None``, otherwise unused.
+        dtype
+            Specifies the type of the data in the array. Required if `nda` is
+            ``None``, otherwise unused.
+        fill_val
+            If ``None``, memory is allocated without initialization. Otherwise,
+            the array is allocated with all elements set to the corresponding
+            fill value. If `nda` is not ``None``, this parameter is ignored.
+        attrs
+            A set of user attributes to be carried along with this LGDO.
+        """
+        if nda is None:
+            if fill_val is None:
+                nda = np.empty(shape, dtype=dtype)
+            elif fill_val == 0:
+                nda = np.zeros(shape, dtype=dtype)
+            else:
+                nda = np.full(shape, fill_val, dtype=dtype)
+        elif isinstance(nda, Array):
+            nda = nda.nda
+        elif not isinstance(nda, np.ndarray):
+            nda = np.array(nda)
+        self.nda = nda
+        self.dtype = self.nda.dtype
+        super().__init__(attrs)
+    def datatype_name(self) -> str:
+        return "array"
+    def form_datatype(self) -> str:
+        dt = self.datatype_name()
+        nd = str(len(self.nda.shape))
+        et = utils.get_element_type(self)
+        return dt + "<" + nd + ">{" + et + "}"
+    def __len__(self) -> int:
+        return len(self.nda)
+    def resize(self, new_size: int) -> None:
+        new_shape = (new_size,) + self.nda.shape[1:]
+        return self.nda.resize(new_shape, refcheck=True)
+    def append(self, value: np.ndarray) -> None:
+        self.resize(len(self) + 1)
+        self.nda[-1] = value
+    def insert(self, i: int, value: int | float) -> None:
+        self.nda = np.insert(self.nda, i, value)
+    def __getitem__(self, key):
+        return self.nda[key]
+    def __setitem__(self, key, value):
+        return self.nda.__setitem__(key, value)
+    def __eq__(self, other: Array) -> bool:
+        if isinstance(other, Array):
+            return self.attrs == other.attrs and np.array_equal(self.nda, other.nda)
+        else:
+            return False
+    def __iter__(self) -> Iterator:
+        yield from self.nda
+    def __str__(self) -> str:
+        attrs = self.getattrs()
+        string = str(self.nda)
+        if attrs:
+            string += f" with attrs={attrs}"
+        return string
+    def __repr__(self) -> str:
+        return (
+            self.__class__.__name__
+            + "("
+            + np.array2string(
+                self.nda,
+                prefix=self.__class__.__name__ + " ",
+                formatter={
+                    "int": lambda x: f"0x{x:02x}" if self.dtype == np.ubyte else str(x)
+                },
+            )
+            + f", attrs={repr(self.attrs)})"
+        )

lgdo/types/arrayofequalsizedarrays.py ADDED Viewed

@@ -0,0 +1,133 @@
+"""
+Implements a LEGEND Data Object representing an array of equal-sized arrays and
+corresponding utilities.
+"""
+from __future__ import annotations
+from collections.abc import Iterator
+from typing import Any
+import numpy as np
+from .. import lgdo_utils as utils
+from . import vectorofvectors as vov
+from .array import Array
+class ArrayOfEqualSizedArrays(Array):
+    """An array of equal-sized arrays.
+    Arrays of equal size within a file but could be different from application
+    to application. Canonical example: array of same-length waveforms.
+    """
+    def __init__(
+        self,
+        dims: tuple[int, ...] = None,
+        nda: np.ndarray = None,
+        shape: tuple[int, ...] = (),
+        dtype: np.dtype = None,
+        fill_val: int | float = None,
+        attrs: dict[str, Any] = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        dims
+            specifies the dimensions required for building the
+            :class:`ArrayOfEqualSizedArrays`' `datatype` attribute.
+        nda
+            An :class:`numpy.ndarray` to be used for this object's internal
+            array. Note: the array is used directly, not copied. If not
+            supplied, internal memory is newly allocated based on the `shape`
+            and `dtype` arguments.
+        shape
+            A NumPy-format shape specification for shape of the internal
+            array. Required if `nda` is ``None``, otherwise unused.
+        dtype
+            Specifies the type of the data in the array. Required if `nda` is
+            ``None``, otherwise unused.
+        fill_val
+            If ``None``, memory is allocated without initialization. Otherwise,
+            the array is allocated with all elements set to the corresponding
+            fill value. If `nda` is not ``None``, this parameter is ignored.
+        attrs
+            A set of user attributes to be carried along with this LGDO.
+        Notes
+        -----
+        If shape is not "1D array of arrays of shape given by axes 1-N" (of
+        `nda`) then specify the dimensionality split in the constructor.
+        See Also
+        --------
+        :class:`.Array`
+        """
+        if dims is None:
+            # If no dims are provided, assume that it's a 1D Array of (N-1)-D Arrays
+            if nda is None:
+                s = shape
+            else:
+                s = nda.shape
+            self.dims = (1, len(s) - 1)
+        else:
+            self.dims = dims
+        super().__init__(
+            nda=nda, shape=shape, dtype=dtype, fill_val=fill_val, attrs=attrs
+        )
+    def datatype_name(self) -> str:
+        return "array_of_equalsized_arrays"
+    def form_datatype(self) -> str:
+        dt = self.datatype_name()
+        nd = str(len(self.nda.shape))
+        if self.dims is not None:
+            nd = ",".join([str(i) for i in self.dims])
+        et = utils.get_element_type(self)
+        return dt + "<" + nd + ">{" + et + "}"
+    def __len__(self) -> int:
+        return len(self.nda)
+    def __iter__(self) -> Iterator[np.array]:
+        return self.nda.__iter__()
+    def __next__(self) -> np.ndarray:
+        return self.nda.__next__()
+    def to_vov(self, cumulative_length: np.ndarray = None) -> vov.VectorOfVectors:
+        """Convert (and eventually resize) to :class:`.vectorofvectors.VectorOfVectors`.
+        Parameters
+        ----------
+        cumulative_length
+            cumulative length array of the output vector of vectors. Each
+            vector in the output is filled with values found in the
+            :class:`ArrayOfEqualSizedArrays`, starting from the first index. if
+            ``None``, use all of the original 2D array and make vectors of
+            equal size.
+        """
+        attrs = self.getattrs()
+        if cumulative_length is None:
+            return vov.VectorOfVectors(
+                flattened_data=self.nda.flatten(),
+                cumulative_length=(np.arange(self.nda.shape[0], dtype="uint32") + 1)
+                * self.nda.shape[1],
+                attrs=attrs,
+            )
+        if not isinstance(cumulative_length, np.ndarray):
+            cumulative_length = np.array(cumulative_length)
+        flattened_data = self.nda[
+            np.arange(self.nda.shape[1])
+            < np.diff(cumulative_length, prepend=0)[:, None]
+        ]
+        return vov.VectorOfVectors(
+            flattened_data=flattened_data,
+            cumulative_length=cumulative_length,
+            attrs=attrs,
+        )

lgdo/types/encoded.py ADDED Viewed

@@ -0,0 +1,390 @@
+from __future__ import annotations
+from collections.abc import Iterator
+from typing import Any
+import numpy as np
+from numpy.typing import NDArray
+from .. import lgdo_utils as utils
+from .array import Array
+from .lgdo import LGDO
+from .scalar import Scalar
+from .vectorofvectors import VectorOfVectors
+class VectorOfEncodedVectors(LGDO):
+    """An array of variable-length encoded arrays.
+    Used to represent an encoded :class:`.VectorOfVectors`. In addition to an
+    internal :class:`.VectorOfVectors` `self.encoded_data` storing the encoded
+    data, a 1D :class:`.Array` in `self.encoded_size` holds the original sizes
+    of the encoded vectors.
+    See Also
+    --------
+    .VectorOfVectors
+    """
+    def __init__(
+        self,
+        encoded_data: VectorOfVectors = None,
+        decoded_size: Array = None,
+        attrs: dict[str, Any] = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        encoded_data
+            the vector of encoded vectors.
+        decoded_size
+            an array holding the original length of each encoded vector in
+            `encoded_data`.
+        attrs
+            A set of user attributes to be carried along with this LGDO. Should
+            include information about the codec used to encode the data.
+        """
+        if isinstance(encoded_data, VectorOfVectors):
+            self.encoded_data = encoded_data
+        elif encoded_data is None:
+            self.encoded_data = VectorOfVectors(dtype="ubyte")
+        else:
+            raise ValueError("encoded_data must be a valid VectorOfVectors")
+        if isinstance(decoded_size, Array):
+            self.decoded_size = decoded_size
+        elif decoded_size is not None:
+            self.decoded_size = Array(decoded_size)
+        elif encoded_data is not None:
+            self.decoded_size = Array(
+                shape=len(encoded_data), dtype="uint32", fill_val=0
+            )
+        elif decoded_size is None:
+            self.decoded_size = Array()
+        if len(self.encoded_data) != len(self.decoded_size):
+            raise RuntimeError("encoded_data vs. decoded_size shape mismatch")
+        super().__init__(attrs)
+    def datatype_name(self) -> str:
+        return "array"
+    def form_datatype(self) -> str:
+        et = utils.get_element_type(self.encoded_data)
+        return "array<1>{encoded_array<1>{" + et + "}}"
+    def __len__(self) -> int:
+        return len(self.encoded_data)
+    def __eq__(self, other: VectorOfEncodedVectors) -> bool:
+        if isinstance(other, VectorOfEncodedVectors):
+            return (
+                self.encoded_data == other.encoded_data
+                and self.decoded_size == other.decoded_size
+                and self.attrs == other.attrs
+            )
+        else:
+            return False
+    def resize(self, new_size: int) -> None:
+        """Resize vector along the first axis.
+        See Also
+        --------
+        .VectorOfVectors.resize
+        """
+        self.encoded_data.resize(new_size)
+        self.decoded_size.resize(new_size)
+    def append(self, value: tuple[NDArray, int]) -> None:
+        """Append a 1D encoded vector at the end.
+        Parameters
+        ----------
+        value
+            a tuple holding the encoded array and its decoded size.
+        See Also
+        --------
+        .VectorOfVectors.append
+        """
+        self.encoded_data.append(value[0])
+        self.decoded_size.append(value[1])
+    def insert(self, i: int, value: tuple[NDArray, int]) -> None:
+        """Insert an encoded vector at index `i`.
+        Parameters
+        ----------
+        i
+            the new vector will be inserted before this index.
+        value
+            a tuple holding the encoded array and its decoded size.
+        See Also
+        --------
+        .VectorOfVectors.insert
+        """
+        self.encoded_data.insert(i, value[0])
+        self.decoded_size.insert(i, value[1])
+    def replace(self, i: int, value: tuple[NDArray, int]) -> None:
+        """Replace the encoded vector (and decoded size) at index `i` with a new one.
+        Parameters
+        ----------
+        i
+            index of the vector to be replaced.
+        value
+            a tuple holding the encoded array and its decoded size.
+        See Also
+        --------
+        .VectorOfVectors.replace
+        """
+        self.encoded_data.replace(i, value[0])
+        self.decoded_size[i] = value[1]
+    def __setitem__(self, i: int, value: tuple[NDArray, int]) -> None:
+        """Set an encoded vector at index `i`.
+        Parameters
+        ----------
+        i
+            the new vector will be set at this index.
+        value
+            a tuple holding the encoded array and its decoded size.
+        """
+        self.encoded_data[i] = value[0]
+        self.decoded_size[i] = value[1]
+    def __getitem__(self, i: int) -> tuple[NDArray, int]:
+        """Return vector at index `i`.
+        Returns
+        -------
+        (encoded_data, decoded_size)
+            the encoded array and its decoded length.
+        """
+        return (self.encoded_data[i], self.decoded_size[i])
+    def __iter__(self) -> Iterator[tuple[NDArray, int]]:
+        yield from zip(self.encoded_data, self.decoded_size)
+    def __str__(self) -> str:
+        string = ""
+        pos = 0
+        for vec, size in self:
+            if pos != 0:
+                string += " "
+            string += (
+                np.array2string(
+                    vec,
+                    prefix=" ",
+                    formatter={
+                        "int": lambda x, vec=vec: f"0x{x:02x}"
+                        if vec.dtype == np.ubyte
+                        else str(x)
+                    },
+                )
+                + f" decoded_size = {size}"
+            )
+            if pos < len(self.encoded_data.cumulative_length):
+                string += ",\n"
+            pos += 1
+        string = f"[{string}]"
+        attrs = self.getattrs()
+        if len(attrs) > 0:
+            string += f" with attrs={attrs}"
+        return string
+    def __repr__(self) -> str:
+        npopt = np.get_printoptions()
+        np.set_printoptions(
+            threshold=5,
+            edgeitems=2,
+            linewidth=100,
+        )
+        out = (
+            "VectorOfEncodedVectors(encoded_data="
+            + repr(self.encoded_data)
+            + ", decoded_size="
+            + repr(self.decoded_size)
+            + ", attrs="
+            + repr(self.attrs)
+            + ")"
+        )
+        np.set_printoptions(**npopt)
+        return out
+class ArrayOfEncodedEqualSizedArrays(LGDO):
+    """An array of encoded arrays with equal decoded size.
+    Used to represent an encoded :class:`.ArrayOfEqualSizedArrays`. In addition
+    to an internal :class:`.VectorOfVectors` `self.encoded_data` storing the
+    encoded data, the size of the decoded arrays is stored in a
+    :class:`.Scalar` `self.encoded_size`.
+    See Also
+    --------
+    .ArrayOfEqualSizedArrays
+    """
+    def __init__(
+        self,
+        encoded_data: VectorOfVectors = None,
+        decoded_size: Scalar | int = None,
+        attrs: dict[str, Any] = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        encoded_data
+            the vector of vectors holding the encoded data.
+        decoded_size
+            the length of the decoded arrays.
+        attrs
+            A set of user attributes to be carried along with this LGDO. Should
+            include information about the codec used to encode the data.
+        """
+        if isinstance(encoded_data, VectorOfVectors):
+            self.encoded_data = encoded_data
+        elif encoded_data is None:
+            self.encoded_data = VectorOfVectors(dtype="ubyte")
+        else:
+            raise ValueError("encoded_data must be a valid VectorOfVectors")
+        if isinstance(decoded_size, Scalar):
+            self.decoded_size = decoded_size
+        elif decoded_size is not None:
+            self.decoded_size = Scalar(int(decoded_size))
+        else:
+            self.decoded_size = Scalar(0)
+        super().__init__(attrs)
+    def datatype_name(self) -> str:
+        return "array"
+    def form_datatype(self) -> str:
+        et = utils.get_element_type(self.encoded_data)
+        return "array_of_encoded_equalsized_arrays<1,1>{" + et + "}"
+    def __len__(self) -> int:
+        return len(self.encoded_data)
+    def __eq__(self, other: ArrayOfEncodedEqualSizedArrays) -> bool:
+        if isinstance(other, ArrayOfEncodedEqualSizedArrays):
+            return (
+                self.encoded_data == other.encoded_data
+                and self.decoded_size == other.decoded_size
+                and self.attrs == other.attrs
+            )
+        else:
+            return False
+    def resize(self, new_size: int) -> None:
+        """Resize array along the first axis.
+        See Also
+        --------
+        .VectorOfVectors.resize
+        """
+        self.encoded_data.resize(new_size)
+    def append(self, value: NDArray) -> None:
+        """Append a 1D encoded array at the end.
+        See Also
+        --------
+        .VectorOfVectors.append
+        """
+        self.encoded_data.append(value)
+    def insert(self, i: int, value: NDArray) -> None:
+        """Insert an encoded array at index `i`.
+        See Also
+        --------
+        .VectorOfVectors.insert
+        """
+        self.encoded_data.insert(i, value)
+    def replace(self, i: int, value: NDArray) -> None:
+        """Replace the encoded array at index `i` with a new one.
+        See Also
+        --------
+        .VectorOfVectors.replace
+        """
+        self.encoded_data.replace(i, value)
+    def __setitem__(self, i: int, value: NDArray) -> None:
+        """Set an encoded array at index `i`."""
+        self.encoded_data[i] = value
+    def __getitem__(self, i: int) -> NDArray:
+        """Return encoded array at index `i`."""
+        return self.encoded_data[i]
+    def __iter__(self) -> Iterator[NDArray]:
+        yield from self.encoded_data
+    def __str__(self) -> str:
+        string = ""
+        pos = 0
+        for vec in self:
+            if pos != 0:
+                string += " "
+            string += np.array2string(
+                vec,
+                prefix=" ",
+                formatter={
+                    "int": lambda x, vec=vec: f"0x{x:02x}"
+                    if vec.dtype == np.ubyte
+                    else str(x)
+                },
+            )
+            if pos < len(self.encoded_data.cumulative_length):
+                string += ",\n"
+            pos += 1
+        string = f"[{string}] decoded_size={self.decoded_size}"
+        attrs = self.getattrs()
+        if len(attrs) > 0:
+            string += f" with attrs={attrs}"
+        return string
+    def __repr__(self) -> str:
+        npopt = np.get_printoptions()
+        np.set_printoptions(
+            threshold=5,
+            edgeitems=2,
+            linewidth=100,
+        )
+        out = (
+            "ArrayOfEncodedEqualSizedArrays(encoded_data="
+            + repr(self.encoded_data)
+            + ", decoded_size="
+            + repr(self.decoded_size)
+            + ", attrs="
+            + repr(self.attrs)
+            + ")"
+        )
+        np.set_printoptions(**npopt)
+        return out