legend-pydataobj 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- legend_pydataobj-1.0.0.dist-info/LICENSE +674 -0
- legend_pydataobj-1.0.0.dist-info/METADATA +63 -0
- legend_pydataobj-1.0.0.dist-info/RECORD +26 -0
- legend_pydataobj-1.0.0.dist-info/WHEEL +5 -0
- legend_pydataobj-1.0.0.dist-info/top_level.txt +1 -0
- lgdo/__init__.py +75 -0
- lgdo/_version.py +4 -0
- lgdo/compression/__init__.py +36 -0
- lgdo/compression/base.py +29 -0
- lgdo/compression/generic.py +77 -0
- lgdo/compression/radware.py +579 -0
- lgdo/compression/utils.py +34 -0
- lgdo/compression/varlen.py +449 -0
- lgdo/lgdo_utils.py +196 -0
- lgdo/lh5_store.py +1711 -0
- lgdo/types/__init__.py +30 -0
- lgdo/types/array.py +140 -0
- lgdo/types/arrayofequalsizedarrays.py +133 -0
- lgdo/types/encoded.py +390 -0
- lgdo/types/fixedsizearray.py +43 -0
- lgdo/types/lgdo.py +51 -0
- lgdo/types/scalar.py +59 -0
- lgdo/types/struct.py +108 -0
- lgdo/types/table.py +349 -0
- lgdo/types/vectorofvectors.py +627 -0
- lgdo/types/waveform_table.py +264 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: legend-pydataobj
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: LEGEND Python Data Objects
|
5
|
+
Home-page: https://github.com/legend-exp/legend-pydataobj
|
6
|
+
Author: The LEGEND Collaboration
|
7
|
+
Maintainer: The LEGEND Collaboration
|
8
|
+
License: GPL-3.0
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
10
|
+
Classifier: Intended Audience :: Developers
|
11
|
+
Classifier: Intended Audience :: Information Technology
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
13
|
+
Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
|
14
|
+
Classifier: Operating System :: MacOS
|
15
|
+
Classifier: Operating System :: POSIX
|
16
|
+
Classifier: Operating System :: Unix
|
17
|
+
Classifier: Programming Language :: Python
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
19
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
20
|
+
Classifier: Topic :: Scientific/Engineering
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
22
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
23
|
+
Classifier: Topic :: Scientific/Engineering :: Physics
|
24
|
+
Classifier: Topic :: Software Development
|
25
|
+
Requires-Python: >=3.9
|
26
|
+
Description-Content-Type: text/markdown
|
27
|
+
License-File: LICENSE
|
28
|
+
Requires-Dist: h5py (>=3.2.0)
|
29
|
+
Requires-Dist: hdf5plugin
|
30
|
+
Requires-Dist: numba (!=0.53.*,!=0.54.*)
|
31
|
+
Requires-Dist: numexpr
|
32
|
+
Requires-Dist: numpy (>=1.21)
|
33
|
+
Requires-Dist: pandas (>=1.4.4)
|
34
|
+
Requires-Dist: parse
|
35
|
+
Requires-Dist: pint
|
36
|
+
Provides-Extra: all
|
37
|
+
Requires-Dist: legend-pydataobj[docs,test] ; extra == 'all'
|
38
|
+
Provides-Extra: docs
|
39
|
+
Requires-Dist: furo ; extra == 'docs'
|
40
|
+
Requires-Dist: jupyter ; extra == 'docs'
|
41
|
+
Requires-Dist: myst-parser ; extra == 'docs'
|
42
|
+
Requires-Dist: nbsphinx ; extra == 'docs'
|
43
|
+
Requires-Dist: sphinx ; extra == 'docs'
|
44
|
+
Requires-Dist: sphinx-copybutton ; extra == 'docs'
|
45
|
+
Requires-Dist: sphinx-inline-tabs ; extra == 'docs'
|
46
|
+
Provides-Extra: test
|
47
|
+
Requires-Dist: pre-commit ; extra == 'test'
|
48
|
+
Requires-Dist: pylegendtestdata ; extra == 'test'
|
49
|
+
Requires-Dist: pytest (>=6.0) ; extra == 'test'
|
50
|
+
Requires-Dist: pytest-cov ; extra == 'test'
|
51
|
+
|
52
|
+
# legend-pydataobj
|
53
|
+
|
54
|
+
[](https://pypi.org/project/legend-pydataobj/)
|
55
|
+

|
56
|
+
[](https://github.com/legend-exp/legend-pydataobj/actions)
|
57
|
+
[](https://github.com/pre-commit/pre-commit)
|
58
|
+
[](https://github.com/psf/black)
|
59
|
+
[](https://app.codecov.io/gh/legend-exp/legend-pydataobj)
|
60
|
+

|
61
|
+

|
62
|
+

|
63
|
+
[](https://legend-pydataobj.readthedocs.io)
|
@@ -0,0 +1,26 @@
|
|
1
|
+
lgdo/__init__.py,sha256=mw2Xnk3eLo9W-jpOIkh8Xi9x_r4piK_ZC7r-H0OzDyE,2826
|
2
|
+
lgdo/_version.py,sha256=FrtOfsGtY3HPkXwopVTLnBRPrKexJ-VIfXoRzWX47O4,160
|
3
|
+
lgdo/lgdo_utils.py,sha256=i6zG4r5wnv2WHXfWv232C_ZxaET556Rft8315QnTTiQ,5596
|
4
|
+
lgdo/lh5_store.py,sha256=ozql8fFtfL5qpOte3B_i9LscDOKLz75b9JHasPYU6qQ,68252
|
5
|
+
lgdo/compression/__init__.py,sha256=oT9OXiDDxC7BZciWrQVfHZNkOxXfj4p8EpF2tF04w84,1091
|
6
|
+
lgdo/compression/base.py,sha256=82cQJujfvoAOKBFx761dEcx_xM02TBCBBuBo6i78tuI,838
|
7
|
+
lgdo/compression/generic.py,sha256=zWoZqR5H4x-xpbvBYKDZ0rnNqUh9pC9aZeoc8Z98OjI,2252
|
8
|
+
lgdo/compression/radware.py,sha256=ZfdebK_N1r1t2YVqqBu5B6RN87OoN9_deeiR97mP0Zo,19312
|
9
|
+
lgdo/compression/utils.py,sha256=YhxfJ_zvDu9I1BnjOsALR7SYbNohZNdm8HZ8IqTjd6w,1060
|
10
|
+
lgdo/compression/varlen.py,sha256=SJtfRfP5T_YM4YsyuAXXZgsRtSps2_ERBE985_Eli3w,14497
|
11
|
+
lgdo/types/__init__.py,sha256=5V2V_e8zLb7wmOGwnkSeuQBN_0yPq6zFyo06gBHa3_c,736
|
12
|
+
lgdo/types/array.py,sha256=Z0RyxW3iXCKlnppN97hLde6Yoj5OFL_teOg5IhCmz68,4489
|
13
|
+
lgdo/types/arrayofequalsizedarrays.py,sha256=vq8kjt7ckztX65duRkcSZTaFxz8AtdIRIvs4D9wmcCU,4492
|
14
|
+
lgdo/types/encoded.py,sha256=M1Z4dKZ4Dq5VkB03-jYmK_jDEfrVi8O0u2w4lZaWHA0,11302
|
15
|
+
lgdo/types/fixedsizearray.py,sha256=zDhj9GXcwPBcnwhlh3RTrJIdATJVZ-5If39PUWBy9mw,1250
|
16
|
+
lgdo/types/lgdo.py,sha256=COil-sFRsUWe9jXN5PdOVF3w0h6KNw-CVnzFW91kYXM,1515
|
17
|
+
lgdo/types/scalar.py,sha256=UnSfVqXOJyAMwInccWXFWpXKRyjMlpfgbMTTGJm1EWE,1651
|
18
|
+
lgdo/types/struct.py,sha256=UxV0wnCHoQM5rSmzEC9EIKWYV6drHVyK5Ab7UQztuj4,2984
|
19
|
+
lgdo/types/table.py,sha256=h5E4Keu-a3l25WXf7m_hm2Yr4RRzGjS_WCQ_Pla7SAI,12531
|
20
|
+
lgdo/types/vectorofvectors.py,sha256=1oxKJDX8VVWpmvUUDHHEzEYw0RRWJrMjOB-jHRY12N4,21859
|
21
|
+
lgdo/types/waveform_table.py,sha256=52vqjGudX5_ZR1-b087jx3vuTxJ_yEPO-dO8Dpi0ceg,9407
|
22
|
+
legend_pydataobj-1.0.0.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
23
|
+
legend_pydataobj-1.0.0.dist-info/METADATA,sha256=75XcT2_CMrBJoZCYOu3gohPrpvPl6wwmhtLmcA2uYI4,3219
|
24
|
+
legend_pydataobj-1.0.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
25
|
+
legend_pydataobj-1.0.0.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
|
26
|
+
legend_pydataobj-1.0.0.dist-info/RECORD,,
|
@@ -0,0 +1 @@
|
|
1
|
+
lgdo
|
lgdo/__init__.py
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
"""
|
2
|
+
LEGEND Data Objects (LGDO) are defined in the `LEGEND data format specification
|
3
|
+
<https://github.com/legend-exp/legend-data-format-specs>`_. This package
|
4
|
+
serves as the Python implementation of that specification. The general strategy
|
5
|
+
for the implementation is to dress standard Python and NumPy objects with an
|
6
|
+
``attr`` dictionary holding LGDO metadata, plus some convenience functions. The
|
7
|
+
basic data object classes are:
|
8
|
+
|
9
|
+
* :class:`.LGDO`: abstract base class for all LGDOs
|
10
|
+
* :class:`.Scalar`: typed Python scalar. Access data via the :attr:`value`
|
11
|
+
attribute
|
12
|
+
* :class:`.Array`: basic :class:`numpy.ndarray`. Access data via the
|
13
|
+
:attr:`nda` attribute.
|
14
|
+
* :class:`.FixedSizeArray`: basic :class:`numpy.ndarray`. Access data via the
|
15
|
+
:attr:`nda` attribute.
|
16
|
+
* :class:`.ArrayOfEqualSizedArrays`: multi-dimensional :class:`numpy.ndarray`.
|
17
|
+
Access data via the :attr:`nda` attribute.
|
18
|
+
* :class:`.VectorOfVectors`: a variable length array of variable length arrays.
|
19
|
+
Implemented as a pair of :class:`.Array`: :attr:`flattened_data` holding the
|
20
|
+
raw data, and :attr:`cumulative_length` whose ith element is the sum of the
|
21
|
+
lengths of the vectors with ``index <= i``
|
22
|
+
* :class:`.VectorOfEncodedVectors`: an array of variable length *encoded*
|
23
|
+
arrays. Implemented as a :class:`.VectorOfVectors` :attr:`encoded_data`
|
24
|
+
holding the encoded vectors and an :class:`.Array` :attr:`decoded_size`
|
25
|
+
specifying the size of each decoded vector. Mainly used to represent a list
|
26
|
+
of compressed waveforms.
|
27
|
+
* :class:`.ArrayOfEncodedEqualSizedArrays`: an array of equal sized encoded
|
28
|
+
arrays. Similar to :class:`.VectorOfEncodedVectors` except for
|
29
|
+
:attr:`decoded_size`, which is now a scalar.
|
30
|
+
* :class:`.Struct`: a dictionary containing LGDO objects. Derives from
|
31
|
+
:class:`dict`
|
32
|
+
* :class:`.Table`: a :class:`.Struct` whose elements ("columns") are all array
|
33
|
+
types with the same length (number of rows)
|
34
|
+
|
35
|
+
Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO
|
36
|
+
is done via the class :class:`.lh5_store.LH5Store`. LH5 files can also be
|
37
|
+
browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
|
38
|
+
`h5py <https://www.h5py.org>`_.
|
39
|
+
"""
|
40
|
+
|
41
|
+
from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
|
42
|
+
from .types import (
|
43
|
+
LGDO,
|
44
|
+
Array,
|
45
|
+
ArrayOfEncodedEqualSizedArrays,
|
46
|
+
ArrayOfEqualSizedArrays,
|
47
|
+
FixedSizeArray,
|
48
|
+
Scalar,
|
49
|
+
Struct,
|
50
|
+
Table,
|
51
|
+
VectorOfEncodedVectors,
|
52
|
+
VectorOfVectors,
|
53
|
+
WaveformTable,
|
54
|
+
)
|
55
|
+
|
56
|
+
__all__ = [
|
57
|
+
"Array",
|
58
|
+
"ArrayOfEqualSizedArrays",
|
59
|
+
"ArrayOfEncodedEqualSizedArrays",
|
60
|
+
"FixedSizeArray",
|
61
|
+
"LGDO",
|
62
|
+
"Scalar",
|
63
|
+
"Struct",
|
64
|
+
"Table",
|
65
|
+
"VectorOfVectors",
|
66
|
+
"VectorOfEncodedVectors",
|
67
|
+
"WaveformTable",
|
68
|
+
"LH5Iterator",
|
69
|
+
"LH5Store",
|
70
|
+
"load_dfs",
|
71
|
+
"load_nda",
|
72
|
+
"ls",
|
73
|
+
"show",
|
74
|
+
"copy",
|
75
|
+
]
|
lgdo/_version.py
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
r"""Data compression utilities.
|
2
|
+
|
3
|
+
This subpackage collects all LEGEND custom data compression (encoding) and
|
4
|
+
decompression (decoding) algorithms.
|
5
|
+
|
6
|
+
Available lossless waveform compression algorithms:
|
7
|
+
|
8
|
+
* :class:`.RadwareSigcompress`, a Python port of the C algorithm
|
9
|
+
`radware-sigcompress` by D. Radford.
|
10
|
+
* :class:`.ULEB128ZigZagDiff` variable-length base-128 encoding of waveform
|
11
|
+
differences.
|
12
|
+
|
13
|
+
All waveform compression algorithms inherit from the :class:`.WaveformCodec`
|
14
|
+
abstract class.
|
15
|
+
|
16
|
+
:func:`~.generic.encode` and :func:`~.generic.decode` provide a high-level
|
17
|
+
interface for encoding/decoding :class:`~.lgdo.LGDO`\ s.
|
18
|
+
|
19
|
+
>>> from lgdo import WaveformTable, compression
|
20
|
+
>>> wftbl = WaveformTable(...)
|
21
|
+
>>> enc_wft = compression.encode(wftable, RadwareSigcompress(codec_shift=-23768)
|
22
|
+
>>> compression.decode(enc_wft) # == wftbl
|
23
|
+
"""
|
24
|
+
|
25
|
+
from .base import WaveformCodec
|
26
|
+
from .generic import decode, encode
|
27
|
+
from .radware import RadwareSigcompress
|
28
|
+
from .varlen import ULEB128ZigZagDiff
|
29
|
+
|
30
|
+
__all__ = [
|
31
|
+
"WaveformCodec",
|
32
|
+
"encode",
|
33
|
+
"decode",
|
34
|
+
"RadwareSigcompress",
|
35
|
+
"ULEB128ZigZagDiff",
|
36
|
+
]
|
lgdo/compression/base.py
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import re
|
4
|
+
from dataclasses import asdict, dataclass
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass(frozen=True)
|
8
|
+
class WaveformCodec:
|
9
|
+
"""Base class identifying a waveform compression algorithm.
|
10
|
+
|
11
|
+
The `self.codec` property returns a string identifier suitable for labeling
|
12
|
+
encoded data on disk. This identifier is constant for all class instances.
|
13
|
+
|
14
|
+
Note
|
15
|
+
----
|
16
|
+
This is an abstract type. The user must provided a concrete subclass.
|
17
|
+
"""
|
18
|
+
|
19
|
+
@property
|
20
|
+
def codec(self):
|
21
|
+
"""The waveform codec string identifier.
|
22
|
+
|
23
|
+
Will be attached as an attribute to the encoded Waveform values.
|
24
|
+
"""
|
25
|
+
return re.sub("(?<!^)(?=[A-Z])", "_", type(self).__name__).lower()
|
26
|
+
|
27
|
+
def asdict(self):
|
28
|
+
"""Return the dataclass fields as dictionary."""
|
29
|
+
return {"codec": self.codec} | asdict(self)
|
@@ -0,0 +1,77 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from .. import types as lgdo
|
6
|
+
from . import radware, varlen
|
7
|
+
from .base import WaveformCodec
|
8
|
+
|
9
|
+
log = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
def encode(
|
13
|
+
obj: lgdo.VectorOfVectors | lgdo.ArrayOfEqualsizedArrays,
|
14
|
+
codec: WaveformCodec | str = None,
|
15
|
+
) -> lgdo.VectorOfEncodedVectors | lgdo.ArrayOfEncodedEqualSizedArrays:
|
16
|
+
"""Encode LGDOs with `codec`.
|
17
|
+
|
18
|
+
Defines behaviors for each implemented waveform encoding algorithm.
|
19
|
+
|
20
|
+
Parameters
|
21
|
+
----------
|
22
|
+
obj
|
23
|
+
LGDO array type.
|
24
|
+
codec
|
25
|
+
algorithm to be used for encoding.
|
26
|
+
"""
|
27
|
+
log.debug(f"encoding {repr(obj)} with {codec}")
|
28
|
+
|
29
|
+
if _is_codec(codec, radware.RadwareSigcompress):
|
30
|
+
enc_obj = radware.encode(obj, shift=codec.codec_shift)
|
31
|
+
elif _is_codec(codec, varlen.ULEB128ZigZagDiff):
|
32
|
+
enc_obj = varlen.encode(obj)
|
33
|
+
else:
|
34
|
+
raise ValueError(f"'{codec}' not supported")
|
35
|
+
|
36
|
+
enc_obj.attrs |= codec.asdict()
|
37
|
+
|
38
|
+
return enc_obj
|
39
|
+
|
40
|
+
|
41
|
+
def decode(
|
42
|
+
obj: lgdo.VectorOfEncodedVectors | lgdo.ArrayOfEncodedEqualSizedArrays,
|
43
|
+
) -> lgdo.VectorOfVectors | lgdo.ArrayOfEqualsizedArrays:
|
44
|
+
"""Decode encoded LGDOs.
|
45
|
+
|
46
|
+
Defines decoding behaviors for each implemented waveform encoding
|
47
|
+
algorithm. Expects to find the codec (and its parameters) the arrays where
|
48
|
+
encoded with among the LGDO attributes.
|
49
|
+
|
50
|
+
Parameters
|
51
|
+
----------
|
52
|
+
obj
|
53
|
+
LGDO array type.
|
54
|
+
"""
|
55
|
+
if "codec" not in obj.attrs:
|
56
|
+
raise RuntimeError(
|
57
|
+
"object does not carry any 'codec' attribute, I don't know how to decode it"
|
58
|
+
)
|
59
|
+
|
60
|
+
codec = obj.attrs["codec"]
|
61
|
+
log.debug(f"decoding {repr(obj)} with {codec}")
|
62
|
+
|
63
|
+
if _is_codec(codec, radware.RadwareSigcompress):
|
64
|
+
return radware.decode(obj, shift=int(obj.attrs.get("codec_shift", 0)))
|
65
|
+
elif _is_codec(codec, varlen.ULEB128ZigZagDiff):
|
66
|
+
return varlen.decode(obj)
|
67
|
+
else:
|
68
|
+
raise ValueError(f"'{codec}' not supported")
|
69
|
+
|
70
|
+
|
71
|
+
def _is_codec(ident: WaveformCodec | str, codec) -> bool:
|
72
|
+
if isinstance(ident, WaveformCodec):
|
73
|
+
return isinstance(ident, codec)
|
74
|
+
elif isinstance(ident, str):
|
75
|
+
return ident == codec().codec
|
76
|
+
else:
|
77
|
+
raise ValueError("input must be WaveformCodec object or string identifier")
|