PyPI - legend-pydataobj - Versions diffs - 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl - Mend

legend-pydataobj 1.11.8py3-none-any.whl → 1.11.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/METADATA +1 -1
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/RECORD +26 -25
lgdo/__init__.py +4 -5
lgdo/_version.py +2 -2
lgdo/lh5/__init__.py +3 -1
lgdo/lh5/_serializers/read/composite.py +3 -1
lgdo/lh5/_serializers/write/composite.py +28 -11
lgdo/lh5/concat.py +9 -3
lgdo/lh5/core.py +31 -26
lgdo/lh5/iterator.py +27 -48
lgdo/lh5/store.py +75 -22
lgdo/lh5/tools.py +111 -0
lgdo/lh5/utils.py +4 -6
lgdo/lh5_store.py +284 -0
lgdo/types/array.py +15 -84
lgdo/types/encoded.py +20 -25
lgdo/types/histogram.py +1 -1
lgdo/types/lgdo.py +0 -50
lgdo/types/table.py +28 -49
lgdo/types/vectorofvectors.py +94 -132
lgdo/types/vovutils.py +4 -14
lgdo/types/waveformtable.py +21 -19
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/WHEEL +0 -0
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/entry_points.txt +0 -0
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/licenses/LICENSE +0 -0
{legend_pydataobj-1.11.8.dist-info → legend_pydataobj-1.11.10.dist-info}/top_level.txt +0 -0

lgdo/types/array.py CHANGED Viewed

@@ -6,7 +6,7 @@ corresponding utilities.
 from __future__ import annotations
 import logging
-from collections.abc import Collection, Iterator
+from collections.abc import Iterator
 from typing import Any
 import awkward as ak
@@ -17,12 +17,12 @@ import pint_pandas  # noqa: F401
 from .. import utils
 from ..units import default_units_registry as u
-from .lgdo import LGDOCollection
+from .lgdo import LGDO
 log = logging.getLogger(__name__)
-class Array(LGDOCollection):
+class Array(LGDO):
     r"""Holds an :class:`numpy.ndarray` and attributes.
     :class:`Array` (and the other various array types) holds an `nda` instead
@@ -78,7 +78,11 @@ class Array(LGDOCollection):
         elif isinstance(nda, Array):
             nda = nda.nda
+        elif not isinstance(nda, np.ndarray):
+            nda = np.array(nda)
         self.nda = nda
+        self.dtype = self.nda.dtype
         super().__init__(attrs)
@@ -92,91 +96,18 @@ class Array(LGDOCollection):
         return dt + "<" + nd + ">{" + et + "}"
     def __len__(self) -> int:
-        return self._size
-    @property
-    def nda(self):
-        return self._nda[: self._size, ...] if self._nda.shape != () else self._nda
-    @nda.setter
-    def nda(self, value):
-        self._nda = value if isinstance(value, np.ndarray) else np.array(value)
-        self._size = len(self._nda) if self._nda.shape != () else 0
-    @property
-    def dtype(self):
-        return self._nda.dtype
-    @property
-    def shape(self):
-        return (len(self),) + self._nda.shape[1:]
-    def reserve_capacity(self, capacity: int) -> None:
-        "Set size (number of rows) of internal memory buffer"
-        if capacity < len(self):
-            msg = "Cannot reduce capacity below Array length"
-            raise ValueError(msg)
-        self._nda.resize((capacity,) + self._nda.shape[1:], refcheck=False)
-    def get_capacity(self) -> int:
-        "Get capacity (i.e. max size before memory must be re-allocated)"
-        return len(self._nda)
-    def trim_capacity(self) -> None:
-        "Set capacity to be minimum needed to support Array size"
-        self.reserve_capacity(np.prod(self.shape))
-    def resize(self, new_size: int | Collection[int], trim=False) -> None:
-        """Set size of Array in rows. Only change capacity if it must be
-        increased to accommodate new rows; in this case double capacity.
-        If trim is True, capacity will be set to match size. If new_size
-        is an int, do not change size of inner dimensions.
-        If new_size is a collection, internal memory will be re-allocated, so
-        this should be done only rarely!"""
-        if isinstance(new_size, Collection):
-            self._size = new_size[0]
-            self._nda.resize(new_size)
-        else:
-            self._size = new_size
-            if trim and new_size != self.get_capacity:
-                self.reserve_capacity(new_size)
-            # If capacity is not big enough, set to next power of 2 big enough
-            if new_size > self.get_capacity():
-                self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
+        return len(self.nda)
+    def resize(self, new_size: int) -> None:
+        new_shape = (new_size,) + self.nda.shape[1:]
+        return self.nda.resize(new_shape, refcheck=True)
     def append(self, value: np.ndarray) -> None:
-        "Append value to end of array (with copy)"
-        self.insert(len(self), value)
+        self.resize(len(self) + 1)
+        self.nda[-1] = value
     def insert(self, i: int, value: int | float) -> None:
-        "Insert value into row i (with copy)"
-        if i > len(self):
-            msg = f"index {i} is out of bounds for array with size {len(self)}"
-            raise IndexError(msg)
-        value = np.array(value)
-        if value.shape == self.shape[1:]:
-            self.resize(len(self) + 1)
-            self[i + 1 :] = self[i:-1]
-            self[i] = value
-        elif value.shape[1:] == self.shape[1:]:
-            self.resize(len(self) + len(value))
-            self[i + len(value) :] = self[i : -len(value)]
-            self[i : i + len(value)] = value
-        else:
-            msg = f"Could not insert value with shape {value.shape} into Array with shape {self.shape}"
-            raise ValueError(msg)
-    def replace(self, i: int, value: int | float) -> None:
-        "Replace value at row i"
-        if i >= len(self):
-            msg = f"index {i} is out of bounds for array with size {len(self)}"
-            raise IndexError(msg)
-        self[i] = value
+        self.nda = np.insert(self.nda, i, value)
     def __getitem__(self, key):
         return self.nda[key]

lgdo/types/encoded.py CHANGED Viewed

@@ -11,12 +11,12 @@ from numpy.typing import NDArray
 from .. import utils
 from .array import Array
-from .lgdo import LGDOCollection
+from .lgdo import LGDO
 from .scalar import Scalar
 from .vectorofvectors import VectorOfVectors
-class VectorOfEncodedVectors(LGDOCollection):
+class VectorOfEncodedVectors(LGDO):
     """An array of variable-length encoded arrays.
     Used to represent an encoded :class:`.VectorOfVectors`. In addition to an
@@ -92,17 +92,6 @@ class VectorOfEncodedVectors(LGDOCollection):
         return False
-    def reserve_capacity(self, *capacity: int) -> None:
-        self.encoded_data.reserve_capacity(*capacity)
-        self.decoded_size.reserve_capacity(capacity[0])
-    def get_capacity(self) -> tuple:
-        return (self.decoded_size.get_capacity, *self.encoded_data.get_capacity())
-    def trim_capacity(self) -> None:
-        self.encoded_data.trim_capacity()
-        self.decoded_size.trim_capacity()
     def resize(self, new_size: int) -> None:
         """Resize vector along the first axis.
@@ -113,6 +102,21 @@ class VectorOfEncodedVectors(LGDOCollection):
         self.encoded_data.resize(new_size)
         self.decoded_size.resize(new_size)
+    def append(self, value: tuple[NDArray, int]) -> None:
+        """Append a 1D encoded vector at the end.
+        Parameters
+        ----------
+        value
+            a tuple holding the encoded array and its decoded size.
+        See Also
+        --------
+        .VectorOfVectors.append
+        """
+        self.encoded_data.append(value[0])
+        self.decoded_size.append(value[1])
     def insert(self, i: int, value: tuple[NDArray, int]) -> None:
         """Insert an encoded vector at index `i`.
@@ -278,7 +282,7 @@ class VectorOfEncodedVectors(LGDOCollection):
         raise ValueError(msg)
-class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
+class ArrayOfEncodedEqualSizedArrays(LGDO):
     """An array of encoded arrays with equal decoded size.
     Used to represent an encoded :class:`.ArrayOfEqualSizedArrays`. In addition
@@ -345,23 +349,14 @@ class ArrayOfEncodedEqualSizedArrays(LGDOCollection):
         return False
-    def reserve_capacity(self, *capacity: int) -> None:
-        self.encoded_data.reserve_capacity(capacity)
-    def get_capacity(self) -> tuple:
-        return self.encoded_data.get_capacity()
-    def trim_capacity(self) -> None:
-        self.encoded_data.trim_capacity()
-    def resize(self, new_size: int, trim: bool = False) -> None:
+    def resize(self, new_size: int) -> None:
         """Resize array along the first axis.
         See Also
         --------
         .VectorOfVectors.resize
         """
-        self.encoded_data.resize(new_size, trim)
+        self.encoded_data.resize(new_size)
     def append(self, value: NDArray) -> None:
         """Append a 1D encoded array at the end.

lgdo/types/histogram.py CHANGED Viewed

@@ -424,7 +424,7 @@ class Histogram(Struct):
             dict.__setitem__(self, name, obj)
         else:
             msg = "histogram fields cannot be mutated "
-            raise AttributeError(msg)
+            raise TypeError(msg)
     def __getattr__(self, name: str) -> None:
         # do not allow for new attributes on this

lgdo/types/lgdo.py CHANGED Viewed

@@ -92,53 +92,3 @@ class LGDO(ABC):
     def __repr__(self) -> str:
         return self.__class__.__name__ + f"(attrs={self.attrs!r})"
-class LGDOCollection(LGDO):
-    """Abstract base class representing a LEGEND Collection Object (LGDO).
-    This defines the interface for classes used as table columns.
-    """
-    @abstractmethod
-    def __init__(self, attrs: dict[str, Any] | None = None) -> None:
-        super().__init__(attrs)
-    @abstractmethod
-    def __len__(self) -> int:
-        """Provides ``__len__`` for this array-like class."""
-    @abstractmethod
-    def reserve_capacity(self, capacity: int) -> None:
-        """Reserve capacity (in rows) for later use. Internal memory buffers
-        will have enough entries to store this many rows.
-        """
-    @abstractmethod
-    def get_capacity(self) -> int:
-        "get reserved capacity of internal memory buffers in rows"
-    @abstractmethod
-    def trim_capacity(self) -> None:
-        """set capacity to only what is required to store current contents
-        of LGDOCollection
-        """
-    @abstractmethod
-    def resize(self, new_size: int, trim: bool = False) -> None:
-        """Return this LGDO's datatype attribute string."""
-    def append(self, val) -> None:
-        "append val to end of LGDOCollection"
-        self.insert(len(self), val)
-    @abstractmethod
-    def insert(self, i: int, val) -> None:
-        "insert val into LGDOCollection at position i"
-    @abstractmethod
-    def replace(self, i: int, val) -> None:
-        "replace item at position i with val in LGDOCollection"
-    def clear(self, trim: bool = False) -> None:
-        "set size of LGDOCollection to zero"
-        self.resize(0, trim=trim)

lgdo/types/table.py CHANGED Viewed

@@ -19,7 +19,7 @@ from pandas.io.formats import format as fmt
 from .array import Array
 from .arrayofequalsizedarrays import ArrayOfEqualSizedArrays
-from .lgdo import LGDO, LGDOCollection
+from .lgdo import LGDO
 from .scalar import Scalar
 from .struct import Struct
 from .vectorofvectors import VectorOfVectors
@@ -27,9 +27,13 @@ from .vectorofvectors import VectorOfVectors
 log = logging.getLogger(__name__)
-class Table(Struct, LGDOCollection):
+class Table(Struct):
     """A special struct of arrays or subtable columns of equal length.
+    Holds onto an internal read/write location ``loc`` that is useful in
+    managing table I/O using functions like :meth:`push_row`, :meth:`is_full`,
+    and :meth:`clear`.
     Note
     ----
     If you write to a table and don't fill it up to its total size, be sure to
@@ -45,7 +49,7 @@ class Table(Struct, LGDOCollection):
     def __init__(
         self,
-        col_dict: Mapping[str, LGDOCollection] | pd.DataFrame | ak.Array | None = None,
+        col_dict: Mapping[str, LGDO] | pd.DataFrame | ak.Array | None = None,
         size: int | None = None,
         attrs: Mapping[str, Any] | None = None,
     ) -> None:
@@ -61,7 +65,7 @@ class Table(Struct, LGDOCollection):
         col_dict
             instantiate this table using the supplied mapping of column names
             and array-like objects. Supported input types are: mapping of
-            strings to LGDOCollections, :class:`pd.DataFrame` and :class:`ak.Array`.
+            strings to LGDOs, :class:`pd.DataFrame` and :class:`ak.Array`.
             Note 1: no copy is performed, the objects are used directly (unless
             :class:`ak.Array` is provided).  Note 2: if `size` is not ``None``,
             all arrays will be resized to match it.  Note 3: if the arrays have
@@ -81,8 +85,7 @@ class Table(Struct, LGDOCollection):
             col_dict = _ak_to_lgdo_or_col_dict(col_dict)
         # call Struct constructor
-        Struct.__init__(self, obj_dict=col_dict)
-        LGDOCollection.__init__(self, attrs=attrs)
+        super().__init__(obj_dict=col_dict, attrs=attrs)
         # if col_dict is not empty, set size according to it
         # if size is also supplied, resize all fields to match it
@@ -90,10 +93,13 @@ class Table(Struct, LGDOCollection):
         if col_dict is not None and len(col_dict) > 0:
             self.resize(new_size=size, do_warn=(size is None))
-        # if no col_dict, just set the size
+        # if no col_dict, just set the size (default to 1024)
         else:
             self.size = size if size is not None else None
+        # always start at loc=0
+        self.loc = 0
     def datatype_name(self) -> str:
         return "table"
@@ -101,31 +107,7 @@ class Table(Struct, LGDOCollection):
         """Provides ``__len__`` for this array-like class."""
         return self.size
-    def reserve_capacity(self, capacity: int | list) -> None:
-        "Set size (number of rows) of internal memory buffer"
-        if isinstance(capacity, int):
-            for obj in self.values():
-                obj.reserve_capacity(capacity)
-        else:
-            if len(capacity) != len(self.keys()):
-                msg = "List of capacities must have same length as number of keys"
-                raise ValueError(msg)
-            for obj, cap in zip(self.values(), capacity):
-                obj.reserve_capacity(cap)
-    def get_capacity(self) -> int:
-        "Get list of capacities for each key"
-        return [v.get_capacity() for v in self.values()]
-    def trim_capacity(self) -> int:
-        "Set capacity to be minimum needed to support Array size"
-        for v in self.values():
-            v.trim_capacity()
-    def resize(
-        self, new_size: int | None = None, do_warn: bool = False, trim: bool = False
-    ) -> None:
+    def resize(self, new_size: int | None = None, do_warn: bool = False) -> None:
         # if new_size = None, use the size from the first field
         for field, obj in self.items():
             if new_size is None:
@@ -137,20 +119,21 @@ class Table(Struct, LGDOCollection):
                         f"with size {len(obj)} != {new_size}"
                     )
                 if isinstance(obj, Table):
-                    obj.resize(new_size, trim)
+                    obj.resize(new_size)
                 else:
-                    obj.resize(new_size, trim)
+                    obj.resize(new_size)
         self.size = new_size
-    def insert(self, i: int, vals: dict) -> None:
-        "Insert vals into table at row i. Vals is a mapping from table key to val"
-        for k, ar in self.items():
-            ar.insert(i, vals[k])
-        self.size += 1
+    def push_row(self) -> None:
+        self.loc += 1
-    def add_field(
-        self, name: str, obj: LGDOCollection, use_obj_size: bool = False
-    ) -> None:
+    def is_full(self) -> bool:
+        return self.loc >= self.size
+    def clear(self) -> None:
+        self.loc = 0
+    def add_field(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
         """Add a field (column) to the table.
         Use the name "field" here to match the terminology used in
@@ -187,9 +170,7 @@ class Table(Struct, LGDOCollection):
             new_size = len(obj) if use_obj_size else self.size
             self.resize(new_size=new_size)
-    def add_column(
-        self, name: str, obj: LGDOCollection, use_obj_size: bool = False
-    ) -> None:
+    def add_column(self, name: str, obj: LGDO, use_obj_size: bool = False) -> None:
         """Alias for :meth:`.add_field` using table terminology 'column'."""
         self.add_field(name, obj, use_obj_size=use_obj_size)
@@ -220,10 +201,8 @@ class Table(Struct, LGDOCollection):
             set to ``False`` to turn off warnings associated with mismatched
             `loc` parameter or :meth:`add_column` warnings.
         """
-        if len(other_table) != len(self) and do_warn:
-            log.warning(
-                f"len(other_table) ({len(other_table)}) != len(self) ({len(self)})"
-            )
+        if other_table.loc != self.loc and do_warn:
+            log.warning(f"other_table.loc ({other_table.loc}) != self.loc({self.loc})")
         if cols is None:
             cols = other_table.keys()
         for name in cols:

legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl

legend-pydataobj 1.11.8py3-none-any.whl → 1.11.10py3-none-any.whl