PyPI - legend-pydataobj - Versions diffs - 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

legend-pydataobj 1.5.1py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/METADATA +1 -1
legend_pydataobj-1.6.0.dist-info/RECORD +54 -0
{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/WHEEL +1 -1
{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/entry_points.txt +1 -0
lgdo/__init__.py +7 -4
lgdo/_version.py +2 -2
lgdo/cli.py +237 -12
lgdo/compression/__init__.py +1 -0
lgdo/lh5/__init__.py +9 -1
lgdo/lh5/_serializers/__init__.py +43 -0
lgdo/lh5/_serializers/read/__init__.py +0 -0
lgdo/lh5/_serializers/read/array.py +34 -0
lgdo/lh5/_serializers/read/composite.py +405 -0
lgdo/lh5/_serializers/read/encoded.py +129 -0
lgdo/lh5/_serializers/read/ndarray.py +104 -0
lgdo/lh5/_serializers/read/scalar.py +34 -0
lgdo/lh5/_serializers/read/utils.py +12 -0
lgdo/lh5/_serializers/read/vector_of_vectors.py +195 -0
lgdo/lh5/_serializers/write/__init__.py +0 -0
lgdo/lh5/_serializers/write/array.py +92 -0
lgdo/lh5/_serializers/write/composite.py +259 -0
lgdo/lh5/_serializers/write/scalar.py +23 -0
lgdo/lh5/_serializers/write/vector_of_vectors.py +95 -0
lgdo/lh5/core.py +272 -0
lgdo/lh5/datatype.py +46 -0
lgdo/lh5/exceptions.py +34 -0
lgdo/lh5/iterator.py +1 -1
lgdo/lh5/store.py +69 -1160
lgdo/lh5/tools.py +27 -53
lgdo/lh5/utils.py +130 -27
lgdo/lh5_store.py +11 -2
lgdo/logging.py +1 -0
lgdo/types/__init__.py +1 -0
lgdo/types/array.py +1 -0
lgdo/types/arrayofequalsizedarrays.py +1 -0
lgdo/types/encoded.py +3 -8
lgdo/types/fixedsizearray.py +1 -0
lgdo/types/struct.py +1 -0
lgdo/types/table.py +37 -5
lgdo/types/vectorofvectors.py +314 -458
lgdo/types/vovutils.py +320 -0
lgdo/types/waveformtable.py +1 -0
lgdo/utils.py +1 -32
legend_pydataobj-1.5.1.dist-info/RECORD +0 -36
{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/LICENSE +0 -0
{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/top_level.txt +0 -0

lgdo/types/vovutils.py ADDED Viewed

@@ -0,0 +1,320 @@
+""":class:`~.lgdo.typing.vectorofvectors.VectorOfVectors` utilities."""
+from __future__ import annotations
+import logging
+from collections.abc import Sequence
+import awkward as ak
+import numba
+import numpy as np
+from numpy.typing import NDArray
+from ..utils import numba_defaults_kwargs as nb_kwargs
+from .array import Array
+log = logging.getLogger(__name__)
+def build_cl(
+    sorted_array_in: NDArray, cumulative_length_out: NDArray | None = None
+) -> NDArray:
+    """Build a cumulative length array from an array of sorted data.
+    Examples
+    --------
+    >>> build_cl(np.array([3, 3, 3, 4])
+    array([3., 4.])
+    For a `sorted_array_in` of indices, this is the inverse of
+    :func:`.explode_cl`, in the sense that doing
+    ``build_cl(explode_cl(cumulative_length))`` would recover the original
+    `cumulative_length`.
+    Parameters
+    ----------
+    sorted_array_in
+        array of data already sorted; each N matching contiguous entries will
+        be converted into a new row of `cumulative_length_out`.
+    cumulative_length_out
+        a pre-allocated array for the output `cumulative_length`. It will
+        always have length <= `sorted_array_in`, so giving them the same length
+        is safe if there is not a better guess.
+    Returns
+    -------
+    cumulative_length_out
+        the output cumulative length array. If the user provides a
+        `cumulative_length_out` that is too long, this return value is sliced
+        to contain only the used portion of the allocated memory.
+    """
+    if len(sorted_array_in) == 0:
+        return None
+    sorted_array_in = np.asarray(sorted_array_in)
+    if cumulative_length_out is None:
+        cumulative_length_out = np.zeros(len(sorted_array_in), dtype=np.uint64)
+    else:
+        cumulative_length_out.fill(0)
+    if len(cumulative_length_out) == 0 and len(sorted_array_in) > 0:
+        msg = "cumulative_length_out too short ({len(cumulative_length_out)})"
+        raise ValueError(msg)
+    return _nb_build_cl(sorted_array_in, cumulative_length_out)
+@numba.njit(**nb_kwargs)
+def _nb_build_cl(sorted_array_in: NDArray, cumulative_length_out: NDArray) -> NDArray:
+    """numbified inner loop for build_cl"""
+    ii = 0
+    last_val = sorted_array_in[0]
+    for val in sorted_array_in:
+        if val != last_val:
+            ii += 1
+            cumulative_length_out[ii] = cumulative_length_out[ii - 1]
+            if ii >= len(cumulative_length_out):
+                msg = "cumulative_length_out too short"
+                raise RuntimeError(msg)
+            last_val = val
+        cumulative_length_out[ii] += 1
+    ii += 1
+    return cumulative_length_out[:ii]
+@numba.guvectorize(
+    [
+        f"{data_type}[:,:],{size_type}[:],{data_type}[:]"
+        for data_type in [
+            "b1",
+            "i1",
+            "i2",
+            "i4",
+            "i8",
+            "u1",
+            "u2",
+            "u4",
+            "u8",
+            "f4",
+            "f8",
+            "c8",
+            "c16",
+        ]
+        for size_type in ["i4", "i8", "u4", "u8"]
+    ],
+    "(l,m),(l),(n)",
+    **nb_kwargs,
+)
+def _nb_fill(aoa_in: NDArray, len_in: NDArray, flattened_array_out: NDArray):
+    """Vectorized function to fill flattened array from array of arrays and
+    lengths. Values in aoa_in past lengths will not be copied.
+    Parameters
+    ----------
+    aoa_in
+        array of arrays containing values to be copied
+    len_in
+        array of vector lengths for each row of aoa_in
+    flattened_array_out
+        flattened array to copy values into. Must be longer than sum of
+        lengths in len_in
+    """
+    if len(flattened_array_out) < len_in.sum():
+        msg = "flattened array not large enough to hold values"
+        raise ValueError(msg)
+    start = 0
+    for i, ll in enumerate(len_in):
+        stop = start + ll
+        flattened_array_out[start:stop] = aoa_in[i, :ll]
+        start = stop
+def explode_cl(cumulative_length: NDArray, array_out: NDArray | None = None) -> NDArray:
+    """Explode a `cumulative_length` array.
+    Examples
+    --------
+    >>> explode_cl(np.array([2, 3]))
+    array([0., 0., 1.])
+    This is the inverse of :func:`.build_cl`, in the sense that doing
+    ``build_cl(explode_cl(cumulative_length))`` would recover the original
+    `cumulative_length`.
+    Parameters
+    ----------
+    cumulative_length
+        the cumulative length array to be exploded.
+    array_out
+        a pre-allocated array to hold the exploded cumulative length array.
+        The length should be equal to ``cumulative_length[-1]``.
+    Returns
+    -------
+    array_out
+        the exploded cumulative length array.
+    """
+    cumulative_length = np.asarray(cumulative_length)
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if array_out is None:
+        array_out = np.empty(int(out_len), dtype=np.uint64)
+    if len(array_out) != out_len:
+        msg = f"bad lengths: cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})"
+        raise ValueError(msg)
+    return _nb_explode_cl(cumulative_length, array_out)
+@numba.njit(**nb_kwargs)
+def _nb_explode_cl(cumulative_length: NDArray, array_out: NDArray) -> NDArray:
+    """numbified inner loop for explode_cl"""
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if len(array_out) != out_len:
+        msg = "bad lengths"
+        raise ValueError(msg)
+    start = 0
+    for ii in range(len(cumulative_length)):
+        nn = int(cumulative_length[ii] - start)
+        for jj in range(nn):
+            array_out[int(start + jj)] = ii
+        start = cumulative_length[ii]
+    return array_out
+def explode(
+    cumulative_length: NDArray, array_in: NDArray, array_out: NDArray | None = None
+) -> NDArray:
+    """Explode a data array using a `cumulative_length` array.
+    This is identical to :func:`.explode_cl`, except `array_in` gets exploded
+    instead of `cumulative_length`.
+    Examples
+    --------
+    >>> explode(np.array([2, 3]), np.array([3, 4]))
+    array([3., 3., 4.])
+    Parameters
+    ----------
+    cumulative_length
+        the cumulative length array to use for exploding.
+    array_in
+        the data to be exploded. Must have same length as `cumulative_length`.
+    array_out
+        a pre-allocated array to hold the exploded data. The length should be
+        equal to ``cumulative_length[-1]``.
+    Returns
+    -------
+    array_out
+        the exploded cumulative length array.
+    """
+    cumulative_length = np.asarray(cumulative_length)
+    array_in = np.asarray(array_in)
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if array_out is None:
+        array_out = np.empty(out_len, dtype=array_in.dtype)
+    if len(cumulative_length) != len(array_in) or len(array_out) != out_len:
+        msg = (
+            f"bad lengths: cl ({len(cumulative_length)}) != in ({len(array_in)}) "
+            f"and cl[-1] ({cumulative_length[-1]}) != out ({len(array_out)})"
+        )
+        raise ValueError(msg)
+    return _nb_explode(cumulative_length, array_in, array_out)
+@numba.njit(**nb_kwargs)
+def _nb_explode(
+    cumulative_length: NDArray, array_in: NDArray, array_out: NDArray
+) -> NDArray:
+    """Numbified inner loop for :func:`.explode`."""
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if len(cumulative_length) != len(array_in) or len(array_out) != out_len:
+        msg = "bad lengths"
+        raise ValueError(msg)
+    ii = 0
+    for jj in range(len(array_out)):
+        while ii < len(cumulative_length) and jj >= cumulative_length[ii]:
+            ii += 1
+        array_out[jj] = array_in[ii]
+    return array_out
+def explode_arrays(
+    cumulative_length: Array,
+    arrays: Sequence[NDArray],
+    arrays_out: Sequence[NDArray] | None = None,
+) -> list:
+    """Explode a set of arrays using a `cumulative_length` array.
+    Parameters
+    ----------
+    cumulative_length
+        the cumulative length array to use for exploding.
+    arrays
+        the data arrays to be exploded. Each array must have same length as
+        `cumulative_length`.
+    arrays_out
+        a list of pre-allocated arrays to hold the exploded data. The length of
+        the list should be equal to the length of `arrays`, and each entry in
+        arrays_out should have length ``cumulative_length[-1]``. If not
+        provided, output arrays are allocated for the user.
+    Returns
+    -------
+    arrays_out
+        the list of exploded cumulative length arrays.
+    """
+    cumulative_length = np.asarray(cumulative_length)
+    for ii in range(len(arrays)):
+        arrays[ii] = np.asarray(arrays[ii])
+    out_len = cumulative_length[-1] if len(cumulative_length) > 0 else 0
+    if arrays_out is None:
+        arrays_out = []
+        for array in arrays:
+            arrays_out.append(np.empty(out_len, dtype=array.dtype))
+    for ii in range(len(arrays)):
+        explode(cumulative_length, arrays[ii], arrays_out[ii])
+    return arrays_out
+def _ak_is_jagged(type_: ak.types.Type) -> bool:
+    """Returns ``True`` if :class:`ak.Array` is jagged at all axes.
+    This assures that :func:`ak.to_buffers` returns the expected data
+    structures.
+    """
+    if isinstance(type_, ak.Array):
+        return _ak_is_jagged(type_.type)
+    if isinstance(type_, (ak.types.ArrayType, ak.types.ListType)):
+        return _ak_is_jagged(type_.content)
+    if isinstance(type_, ak.types.ScalarType):
+        msg = "Expected ArrayType or its content"
+        raise TypeError(msg)
+    return not isinstance(type_, ak.types.RegularType)
+# https://github.com/scikit-hep/awkward/discussions/3049
+def _ak_is_valid(type_: ak.types.Type) -> bool:
+    """Returns ``True`` if :class:`ak.Array` contains only elements we can serialize to LH5."""
+    if isinstance(type_, ak.Array):
+        return _ak_is_valid(type_.type)
+    if isinstance(type_, (ak.types.ArrayType, ak.types.ListType)):
+        return _ak_is_valid(type_.content)
+    if isinstance(type_, ak.types.ScalarType):
+        msg = "Expected ArrayType or its content"
+        raise TypeError(msg)
+    return not isinstance(
+        type_,
+        (
+            ak.types.OptionType,
+            ak.types.UnionType,
+            ak.types.RecordType,
+        ),
+    )
+    return isinstance(type_, ak.types.NumpyType)

lgdo/types/waveformtable.py CHANGED Viewed

@@ -3,6 +3,7 @@ Implements a LEGEND Data Object representing a special
 :class:`~.lgdo.table.Table` to store blocks of one-dimensional time-series
 data.
 """
 from __future__ import annotations
 import logging

lgdo/utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 """Implements utilities for LEGEND Data Objects."""
 from __future__ import annotations
 import logging
@@ -8,8 +9,6 @@ from typing import Any
 import numpy as np
-from . import types as lgdo
 log = logging.getLogger(__name__)
@@ -56,36 +55,6 @@ def get_element_type(obj: object) -> str:
     raise ValueError(msg, type(obj).__name__)
-def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> lgdo.LGDO:
-    """Return a copy of an LGDO.
-    Parameters
-    ----------
-    obj
-        the LGDO to be copied.
-    dtype
-        NumPy dtype to be used for the copied object.
-    """
-    if dtype is None:
-        dtype = obj.dtype
-    if isinstance(obj, lgdo.Array):
-        return lgdo.Array(
-            np.array(obj.nda, dtype=dtype, copy=True), attrs=dict(obj.attrs)
-        )
-    if isinstance(obj, lgdo.VectorOfVectors):
-        return lgdo.VectorOfVectors(
-            flattened_data=copy(obj.flattened_data, dtype=dtype),
-            cumulative_length=copy(obj.cumulative_length),
-            attrs=dict(obj.attrs),
-        )
-    msg = f"copy of {type(obj)} not supported"
-    raise ValueError(msg)
 def getenv_bool(name: str, default: bool = False) -> bool:
     """Get environment value as a boolean, returning True for 1, t and true
     (caps-insensitive), and False for any other value and default if undefined.

legend_pydataobj-1.5.1.dist-info/RECORD DELETED Viewed

@@ -1,36 +0,0 @@
-lgdo/__init__.py,sha256=qPZZxzGMSt0Y5609kcwRiCW9qswCUIhRnGhIUHlH3uU,2913
-lgdo/_version.py,sha256=W6YuN1JOd6M-rSt9HDXK91AutRDYXTjJT_LQg3rCsjk,411
-lgdo/cli.py,sha256=5H-8LoVq-_Q7ufelDno0Sd9TsfOyJPs5O_BAGs4sG8k,1638
-lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
-lgdo/lh5_store.py,sha256=56TbTMfVdvb3yewZIGW2kZAdhdSLndDjxHMXJKpQuZI,8585
-lgdo/logging.py,sha256=nPNxXg553r1ItI9IS1M-PE8kGvi_tGI-Uoq8GK000Rw,1002
-lgdo/units.py,sha256=nbJ0JTNqlhHUXiBXT3k6qhRpSfMk5_9yW7EeC0dhMuQ,151
-lgdo/utils.py,sha256=N42E50vTXq8qZ0sqJCQq3tr3PCq97Ugb4zgYQCk_uLg,4457
-lgdo/compression/__init__.py,sha256=MaJ0G0cuXn2AVx1a9chIIH2F5cQG40J7aRa-xrPw5SI,1126
-lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
-lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2515
-lgdo/compression/radware.py,sha256=VbKAvi18h48Fz-ZxMEg64yD1ezaw1NkMZazxurdyMmc,24015
-lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
-lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
-lgdo/lh5/__init__.py,sha256=8ouXSwosLsYc4T-KfKzN5TIe27RKvAxG0tZQ6cMdwFY,616
-lgdo/lh5/iterator.py,sha256=x6hJ85xWmAXkDrqVALQwJxYsuphxM6CrobBU3v4Ikmo,12369
-lgdo/lh5/store.py,sha256=7gTP4m9kb-Nhk9uaoOR-kF440YfOvzeSF4qQkjj-eAE,55673
-lgdo/lh5/tools.py,sha256=Bk1O6m45ArlxptWgovvE7AkFvS3RIsmr67l9txxKzo0,9057
-lgdo/lh5/utils.py,sha256=rPLS3QfvTfjrY4ApvSE720AgzaPi7NuDzOIbe13ZXrA,3658
-lgdo/types/__init__.py,sha256=WjMO-sEqxBlvpeACRiq78VufmvAXWrI9zgR2H0mhJY8,770
-lgdo/types/array.py,sha256=pHlkxhPlyigKa__ai7bYatVaGJszfvd90Y_en6H-A48,6536
-lgdo/types/arrayofequalsizedarrays.py,sha256=FP6z4_QaJGCcQ5fc5yKolOvp5E7GM7RydV9eOCD8Nxs,4948
-lgdo/types/encoded.py,sha256=KTs0NzZo6LEZyIUdxxYAm7IKlZR10ln-65yjilcxaLw,15304
-lgdo/types/fixedsizearray.py,sha256=6Pvkp3OC6bAF37id9p1vy_NnYPRwsuc22UAtrgs4qlU,1524
-lgdo/types/lgdo.py,sha256=UnJDi1emQYVgH_H29Vipfs4LelPopxG5pgZUu1eKOlw,2761
-lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
-lgdo/types/struct.py,sha256=qAIzxrypcIOlWAfNb4K2wDguI9moVXfBMprkAaoMCGY,3978
-lgdo/types/table.py,sha256=5MWLLOCgASwK9X2cVaHAszbdBdNvUZGX0d4x38mFb6U,15538
-lgdo/types/vectorofvectors.py,sha256=0ukvEhU_AaiVIDBiiYBeFNlScCZSQWttjKVwk8L9wok,28228
-lgdo/types/waveformtable.py,sha256=pXoXpy8uZqGPonFjD-VTNBFc5lMMKBrw2JgYsjhk8bc,9900
-legend_pydataobj-1.5.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-legend_pydataobj-1.5.1.dist-info/METADATA,sha256=QnKaqJxYPtNOEQKfiJRpykNMpMdmYKhYrLnQQ6ZV_4k,44353
-legend_pydataobj-1.5.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
-legend_pydataobj-1.5.1.dist-info/entry_points.txt,sha256=j22HoS-1cVhTtKJkDnKB49uNH0nEVER2Tpw-lVh1aws,41
-legend_pydataobj-1.5.1.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
-legend_pydataobj-1.5.1.dist-info/RECORD,,

{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{legend_pydataobj-1.5.1.dist-info → legend_pydataobj-1.6.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

legend-pydataobj 1.5.1__py3-none-any.whl → 1.6.0__py3-none-any.whl

legend-pydataobj 1.5.1py3-none-any.whl → 1.6.0py3-none-any.whl