legend-pydataobj 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,63 @@
1
+ Metadata-Version: 2.1
2
+ Name: legend-pydataobj
3
+ Version: 1.0.0
4
+ Summary: LEGEND Python Data Objects
5
+ Home-page: https://github.com/legend-exp/legend-pydataobj
6
+ Author: The LEGEND Collaboration
7
+ Maintainer: The LEGEND Collaboration
8
+ License: GPL-3.0
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Information Technology
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
14
+ Classifier: Operating System :: MacOS
15
+ Classifier: Operating System :: POSIX
16
+ Classifier: Operating System :: Unix
17
+ Classifier: Programming Language :: Python
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3 :: Only
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
23
+ Classifier: Topic :: Scientific/Engineering :: Physics
24
+ Classifier: Topic :: Software Development
25
+ Requires-Python: >=3.9
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: h5py (>=3.2.0)
29
+ Requires-Dist: hdf5plugin
30
+ Requires-Dist: numba (!=0.53.*,!=0.54.*)
31
+ Requires-Dist: numexpr
32
+ Requires-Dist: numpy (>=1.21)
33
+ Requires-Dist: pandas (>=1.4.4)
34
+ Requires-Dist: parse
35
+ Requires-Dist: pint
36
+ Provides-Extra: all
37
+ Requires-Dist: legend-pydataobj[docs,test] ; extra == 'all'
38
+ Provides-Extra: docs
39
+ Requires-Dist: furo ; extra == 'docs'
40
+ Requires-Dist: jupyter ; extra == 'docs'
41
+ Requires-Dist: myst-parser ; extra == 'docs'
42
+ Requires-Dist: nbsphinx ; extra == 'docs'
43
+ Requires-Dist: sphinx ; extra == 'docs'
44
+ Requires-Dist: sphinx-copybutton ; extra == 'docs'
45
+ Requires-Dist: sphinx-inline-tabs ; extra == 'docs'
46
+ Provides-Extra: test
47
+ Requires-Dist: pre-commit ; extra == 'test'
48
+ Requires-Dist: pylegendtestdata ; extra == 'test'
49
+ Requires-Dist: pytest (>=6.0) ; extra == 'test'
50
+ Requires-Dist: pytest-cov ; extra == 'test'
51
+
52
+ # legend-pydataobj
53
+
54
+ [![PyPI](https://img.shields.io/pypi/v/legend-pydataobj?logo=pypi)](https://pypi.org/project/legend-pydataobj/)
55
+ ![GitHub tag (latest by date)](https://img.shields.io/github/v/tag/legend-exp/legend-pydataobj?logo=git)
56
+ [![GitHub Workflow Status](https://img.shields.io/github/checks-status/legend-exp/legend-pydataobj/main?label=main%20branch&logo=github)](https://github.com/legend-exp/legend-pydataobj/actions)
57
+ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://github.com/pre-commit/pre-commit)
58
+ [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
59
+ [![Codecov](https://img.shields.io/codecov/c/github/legend-exp/legend-pydataobj?logo=codecov)](https://app.codecov.io/gh/legend-exp/legend-pydataobj)
60
+ ![GitHub issues](https://img.shields.io/github/issues/legend-exp/legend-pydataobj?logo=github)
61
+ ![GitHub pull requests](https://img.shields.io/github/issues-pr/legend-exp/legend-pydataobj?logo=github)
62
+ ![License](https://img.shields.io/github/license/legend-exp/legend-pydataobj)
63
+ [![Read the Docs](https://img.shields.io/readthedocs/legend-pydataobj?logo=readthedocs)](https://legend-pydataobj.readthedocs.io)
@@ -0,0 +1,26 @@
1
+ lgdo/__init__.py,sha256=mw2Xnk3eLo9W-jpOIkh8Xi9x_r4piK_ZC7r-H0OzDyE,2826
2
+ lgdo/_version.py,sha256=FrtOfsGtY3HPkXwopVTLnBRPrKexJ-VIfXoRzWX47O4,160
3
+ lgdo/lgdo_utils.py,sha256=i6zG4r5wnv2WHXfWv232C_ZxaET556Rft8315QnTTiQ,5596
4
+ lgdo/lh5_store.py,sha256=ozql8fFtfL5qpOte3B_i9LscDOKLz75b9JHasPYU6qQ,68252
5
+ lgdo/compression/__init__.py,sha256=oT9OXiDDxC7BZciWrQVfHZNkOxXfj4p8EpF2tF04w84,1091
6
+ lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
7
+ lgdo/compression/generic.py,sha256=zWoZqR5H4x-xpbvBYKDZ0rnNqUh9pC9aZeoc8Z98OjI,2252
8
+ lgdo/compression/radware.py,sha256=ZfdebK_N1r1t2YVqqBu5B6RN87OoN9_deeiR97mP0Zo,19312
9
+ lgdo/compression/utils.py,sha256=YhxfJ_zvDu9I1BnjOsALR7SYbNohZNdm8HZ8IqTjd6w,1060
10
+ lgdo/compression/varlen.py,sha256=SJtfRfP5T_YM4YsyuAXXZgsRtSps2_ERBE985_Eli3w,14497
11
+ lgdo/types/__init__.py,sha256=5V2V_e8zLb7wmOGwnkSeuQBN_0yPq6zFyo06gBHa3_c,736
12
+ lgdo/types/array.py,sha256=Z0RyxW3iXCKlnppN97hLde6Yoj5OFL_teOg5IhCmz68,4489
13
+ lgdo/types/arrayofequalsizedarrays.py,sha256=vq8kjt7ckztX65duRkcSZTaFxz8AtdIRIvs4D9wmcCU,4492
14
+ lgdo/types/encoded.py,sha256=M1Z4dKZ4Dq5VkB03-jYmK_jDEfrVi8O0u2w4lZaWHA0,11302
15
+ lgdo/types/fixedsizearray.py,sha256=zDhj9GXcwPBcnwhlh3RTrJIdATJVZ-5If39PUWBy9mw,1250
16
+ lgdo/types/lgdo.py,sha256=COil-sFRsUWe9jXN5PdOVF3w0h6KNw-CVnzFW91kYXM,1515
17
+ lgdo/types/scalar.py,sha256=UnSfVqXOJyAMwInccWXFWpXKRyjMlpfgbMTTGJm1EWE,1651
18
+ lgdo/types/struct.py,sha256=UxV0wnCHoQM5rSmzEC9EIKWYV6drHVyK5Ab7UQztuj4,2984
19
+ lgdo/types/table.py,sha256=h5E4Keu-a3l25WXf7m_hm2Yr4RRzGjS_WCQ_Pla7SAI,12531
20
+ lgdo/types/vectorofvectors.py,sha256=1oxKJDX8VVWpmvUUDHHEzEYw0RRWJrMjOB-jHRY12N4,21859
21
+ lgdo/types/waveform_table.py,sha256=52vqjGudX5_ZR1-b087jx3vuTxJ_yEPO-dO8Dpi0ceg,9407
22
+ legend_pydataobj-1.0.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
23
+ legend_pydataobj-1.0.0.dist-info/METADATA,sha256=75XcT2_CMrBJoZCYOu3gohPrpvPl6wwmhtLmcA2uYI4,3219
24
+ legend_pydataobj-1.0.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
25
+ legend_pydataobj-1.0.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
26
+ legend_pydataobj-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: bdist_wheel (0.40.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ lgdo
lgdo/__init__.py ADDED
@@ -0,0 +1,75 @@
1
+ """
2
+ LEGEND Data Objects (LGDO) are defined in the `LEGEND data format specification
3
+ <https://github.com/legend-exp/legend-data-format-specs>`_. This package
4
+ serves as the Python implementation of that specification. The general strategy
5
+ for the implementation is to dress standard Python and NumPy objects with an
6
+ ``attr`` dictionary holding LGDO metadata, plus some convenience functions. The
7
+ basic data object classes are:
8
+
9
+ * :class:`.LGDO`: abstract base class for all LGDOs
10
+ * :class:`.Scalar`: typed Python scalar. Access data via the :attr:`value`
11
+ attribute
12
+ * :class:`.Array`: basic :class:`numpy.ndarray`. Access data via the
13
+ :attr:`nda` attribute.
14
+ * :class:`.FixedSizeArray`: basic :class:`numpy.ndarray`. Access data via the
15
+ :attr:`nda` attribute.
16
+ * :class:`.ArrayOfEqualSizedArrays`: multi-dimensional :class:`numpy.ndarray`.
17
+ Access data via the :attr:`nda` attribute.
18
+ * :class:`.VectorOfVectors`: a variable length array of variable length arrays.
19
+ Implemented as a pair of :class:`.Array`: :attr:`flattened_data` holding the
20
+ raw data, and :attr:`cumulative_length` whose ith element is the sum of the
21
+ lengths of the vectors with ``index <= i``
22
+ * :class:`.VectorOfEncodedVectors`: an array of variable length *encoded*
23
+ arrays. Implemented as a :class:`.VectorOfVectors` :attr:`encoded_data`
24
+ holding the encoded vectors and an :class:`.Array` :attr:`decoded_size`
25
+ specifying the size of each decoded vector. Mainly used to represent a list
26
+ of compressed waveforms.
27
+ * :class:`.ArrayOfEncodedEqualSizedArrays`: an array of equal sized encoded
28
+ arrays. Similar to :class:`.VectorOfEncodedVectors` except for
29
+ :attr:`decoded_size`, which is now a scalar.
30
+ * :class:`.Struct`: a dictionary containing LGDO objects. Derives from
31
+ :class:`dict`
32
+ * :class:`.Table`: a :class:`.Struct` whose elements ("columns") are all array
33
+ types with the same length (number of rows)
34
+
35
+ Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO
36
+ is done via the class :class:`.lh5_store.LH5Store`. LH5 files can also be
37
+ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
38
+ `h5py <https://www.h5py.org>`_.
39
+ """
40
+
41
+ from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
42
+ from .types import (
43
+ LGDO,
44
+ Array,
45
+ ArrayOfEncodedEqualSizedArrays,
46
+ ArrayOfEqualSizedArrays,
47
+ FixedSizeArray,
48
+ Scalar,
49
+ Struct,
50
+ Table,
51
+ VectorOfEncodedVectors,
52
+ VectorOfVectors,
53
+ WaveformTable,
54
+ )
55
+
56
+ __all__ = [
57
+ "Array",
58
+ "ArrayOfEqualSizedArrays",
59
+ "ArrayOfEncodedEqualSizedArrays",
60
+ "FixedSizeArray",
61
+ "LGDO",
62
+ "Scalar",
63
+ "Struct",
64
+ "Table",
65
+ "VectorOfVectors",
66
+ "VectorOfEncodedVectors",
67
+ "WaveformTable",
68
+ "LH5Iterator",
69
+ "LH5Store",
70
+ "load_dfs",
71
+ "load_nda",
72
+ "ls",
73
+ "show",
74
+ "copy",
75
+ ]
lgdo/_version.py ADDED
@@ -0,0 +1,4 @@
1
+ # file generated by setuptools_scm
2
+ # don't change, don't track in version control
3
+ __version__ = version = '1.0.0'
4
+ __version_tuple__ = version_tuple = (1, 0, 0)
@@ -0,0 +1,36 @@
1
+ r"""Data compression utilities.
2
+
3
+ This subpackage collects all LEGEND custom data compression (encoding) and
4
+ decompression (decoding) algorithms.
5
+
6
+ Available lossless waveform compression algorithms:
7
+
8
+ * :class:`.RadwareSigcompress`, a Python port of the C algorithm
9
+ `radware-sigcompress` by D. Radford.
10
+ * :class:`.ULEB128ZigZagDiff` variable-length base-128 encoding of waveform
11
+ differences.
12
+
13
+ All waveform compression algorithms inherit from the :class:`.WaveformCodec`
14
+ abstract class.
15
+
16
+ :func:`~.generic.encode` and :func:`~.generic.decode` provide a high-level
17
+ interface for encoding/decoding :class:`~.lgdo.LGDO`\ s.
18
+
19
+ >>> from lgdo import WaveformTable, compression
20
+ >>> wftbl = WaveformTable(...)
21
+ >>> enc_wft = compression.encode(wftable, RadwareSigcompress(codec_shift=-23768)
22
+ >>> compression.decode(enc_wft) # == wftbl
23
+ """
24
+
25
+ from .base import WaveformCodec
26
+ from .generic import decode, encode
27
+ from .radware import RadwareSigcompress
28
+ from .varlen import ULEB128ZigZagDiff
29
+
30
+ __all__ = [
31
+ "WaveformCodec",
32
+ "encode",
33
+ "decode",
34
+ "RadwareSigcompress",
35
+ "ULEB128ZigZagDiff",
36
+ ]
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import asdict, dataclass
5
+
6
+
7
+ @dataclass(frozen=True)
8
+ class WaveformCodec:
9
+ """Base class identifying a waveform compression algorithm.
10
+
11
+ The `self.codec` property returns a string identifier suitable for labeling
12
+ encoded data on disk. This identifier is constant for all class instances.
13
+
14
+ Note
15
+ ----
16
+ This is an abstract type. The user must provided a concrete subclass.
17
+ """
18
+
19
+ @property
20
+ def codec(self):
21
+ """The waveform codec string identifier.
22
+
23
+ Will be attached as an attribute to the encoded Waveform values.
24
+ """
25
+ return re.sub("(?<!^)(?=[A-Z])", "_", type(self).__name__).lower()
26
+
27
+ def asdict(self):
28
+ """Return the dataclass fields as dictionary."""
29
+ return {"codec": self.codec} | asdict(self)
@@ -0,0 +1,77 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from .. import types as lgdo
6
+ from . import radware, varlen
7
+ from .base import WaveformCodec
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def encode(
13
+ obj: lgdo.VectorOfVectors | lgdo.ArrayOfEqualsizedArrays,
14
+ codec: WaveformCodec | str = None,
15
+ ) -> lgdo.VectorOfEncodedVectors | lgdo.ArrayOfEncodedEqualSizedArrays:
16
+ """Encode LGDOs with `codec`.
17
+
18
+ Defines behaviors for each implemented waveform encoding algorithm.
19
+
20
+ Parameters
21
+ ----------
22
+ obj
23
+ LGDO array type.
24
+ codec
25
+ algorithm to be used for encoding.
26
+ """
27
+ log.debug(f"encoding {repr(obj)} with {codec}")
28
+
29
+ if _is_codec(codec, radware.RadwareSigcompress):
30
+ enc_obj = radware.encode(obj, shift=codec.codec_shift)
31
+ elif _is_codec(codec, varlen.ULEB128ZigZagDiff):
32
+ enc_obj = varlen.encode(obj)
33
+ else:
34
+ raise ValueError(f"'{codec}' not supported")
35
+
36
+ enc_obj.attrs |= codec.asdict()
37
+
38
+ return enc_obj
39
+
40
+
41
+ def decode(
42
+ obj: lgdo.VectorOfEncodedVectors | lgdo.ArrayOfEncodedEqualSizedArrays,
43
+ ) -> lgdo.VectorOfVectors | lgdo.ArrayOfEqualsizedArrays:
44
+ """Decode encoded LGDOs.
45
+
46
+ Defines decoding behaviors for each implemented waveform encoding
47
+ algorithm. Expects to find the codec (and its parameters) the arrays where
48
+ encoded with among the LGDO attributes.
49
+
50
+ Parameters
51
+ ----------
52
+ obj
53
+ LGDO array type.
54
+ """
55
+ if "codec" not in obj.attrs:
56
+ raise RuntimeError(
57
+ "object does not carry any 'codec' attribute, I don't know how to decode it"
58
+ )
59
+
60
+ codec = obj.attrs["codec"]
61
+ log.debug(f"decoding {repr(obj)} with {codec}")
62
+
63
+ if _is_codec(codec, radware.RadwareSigcompress):
64
+ return radware.decode(obj, shift=int(obj.attrs.get("codec_shift", 0)))
65
+ elif _is_codec(codec, varlen.ULEB128ZigZagDiff):
66
+ return varlen.decode(obj)
67
+ else:
68
+ raise ValueError(f"'{codec}' not supported")
69
+
70
+
71
+ def _is_codec(ident: WaveformCodec | str, codec) -> bool:
72
+ if isinstance(ident, WaveformCodec):
73
+ return isinstance(ident, codec)
74
+ elif isinstance(ident, str):
75
+ return ident == codec().codec
76
+ else:
77
+ raise ValueError("input must be WaveformCodec object or string identifier")