legend-pydataobj 1.8.1__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/PKG-INFO +3 -2
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/pyproject.toml +2 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/PKG-INFO +3 -2
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/SOURCES.txt +2 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/requires.txt +2 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/__init__.py +4 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/_version.py +2 -2
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/__init__.py +2 -0
- legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/array.py +34 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/composite.py +122 -70
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/encoded.py +31 -9
- legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/ndarray.py +117 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/scalar.py +10 -3
- legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/utils.py +35 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +35 -13
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/array.py +6 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/composite.py +20 -4
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/scalar.py +6 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/core.py +78 -7
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/datatype.py +1 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/exceptions.py +3 -3
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/store.py +101 -11
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/tools.py +1 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/utils.py +13 -2
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5_store.py +1 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/__init__.py +2 -0
- legend_pydataobj-1.10.0/src/lgdo/types/histogram.py +419 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/table.py +1 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/conftest.py +1 -1
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_core.py +6 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_datatype.py +1 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_write.py +133 -0
- legend_pydataobj-1.10.0/tests/types/test_histogram.py +298 -0
- legend_pydataobj-1.8.1/src/lgdo/lh5/_serializers/read/array.py +0 -34
- legend_pydataobj-1.8.1/src/lgdo/lh5/_serializers/read/ndarray.py +0 -103
- legend_pydataobj-1.8.1/src/lgdo/lh5/_serializers/read/utils.py +0 -12
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/LICENSE +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/README.md +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/setup.cfg +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/cli.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/__init__.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/base.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/generic.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/radware.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/utils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/varlen.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lgdo_utils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/__init__.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/iterator.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/logging.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/array.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/encoded.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/fixedsizearray.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/lgdo.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/scalar.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/struct.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/vectorofvectors.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/vovutils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/waveformtable.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/units.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/utils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/conftest.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_compression.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_radware_sigcompress.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_str2wfcodec.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/conftest.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_iterator.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_store.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_tools.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_utils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/test_cli.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/test_lgdo_utils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_array.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_encoded.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_fixedsizearray.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_representations.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_scalar.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_struct.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_table.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_table_eval.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_vectorofvectors.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_vovutils.py +0 -0
- {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.0
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -700,8 +700,9 @@ License-File: LICENSE
|
|
700
700
|
Requires-Dist: awkward>=2
|
701
701
|
Requires-Dist: awkward-pandas
|
702
702
|
Requires-Dist: colorlog
|
703
|
-
Requires-Dist: h5py>=3.
|
703
|
+
Requires-Dist: h5py>=3.10
|
704
704
|
Requires-Dist: hdf5plugin
|
705
|
+
Requires-Dist: hist
|
705
706
|
Requires-Dist: numba!=0.53.*,!=0.54.*
|
706
707
|
Requires-Dist: numexpr
|
707
708
|
Requires-Dist: numpy>=1.21
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.0
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -700,8 +700,9 @@ License-File: LICENSE
|
|
700
700
|
Requires-Dist: awkward>=2
|
701
701
|
Requires-Dist: awkward-pandas
|
702
702
|
Requires-Dist: colorlog
|
703
|
-
Requires-Dist: h5py>=3.
|
703
|
+
Requires-Dist: h5py>=3.10
|
704
704
|
Requires-Dist: hdf5plugin
|
705
|
+
Requires-Dist: hist
|
705
706
|
Requires-Dist: numba!=0.53.*,!=0.54.*
|
706
707
|
Requires-Dist: numexpr
|
707
708
|
Requires-Dist: numpy>=1.21
|
{legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/SOURCES.txt
RENAMED
@@ -49,6 +49,7 @@ src/lgdo/types/array.py
|
|
49
49
|
src/lgdo/types/arrayofequalsizedarrays.py
|
50
50
|
src/lgdo/types/encoded.py
|
51
51
|
src/lgdo/types/fixedsizearray.py
|
52
|
+
src/lgdo/types/histogram.py
|
52
53
|
src/lgdo/types/lgdo.py
|
53
54
|
src/lgdo/types/scalar.py
|
54
55
|
src/lgdo/types/struct.py
|
@@ -78,6 +79,7 @@ tests/types/test_array.py
|
|
78
79
|
tests/types/test_arrayofequalsizedarrays.py
|
79
80
|
tests/types/test_encoded.py
|
80
81
|
tests/types/test_fixedsizearray.py
|
82
|
+
tests/types/test_histogram.py
|
81
83
|
tests/types/test_representations.py
|
82
84
|
tests/types/test_scalar.py
|
83
85
|
tests/types/test_struct.py
|
@@ -33,6 +33,8 @@ basic data object classes are:
|
|
33
33
|
:class:`dict`
|
34
34
|
* :class:`.Table`: a :class:`.Struct` whose elements ("columns") are all array
|
35
35
|
types with the same length (number of rows)
|
36
|
+
* :class:`.Histogram`: holds an array of histogrammed data, and the associated
|
37
|
+
binning of arbitrary dimensionality.
|
36
38
|
|
37
39
|
Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO
|
38
40
|
is done via the class :class:`.lh5_store.LH5Store`. LH5 files can also be
|
@@ -50,6 +52,7 @@ from .types import (
|
|
50
52
|
ArrayOfEncodedEqualSizedArrays,
|
51
53
|
ArrayOfEqualSizedArrays,
|
52
54
|
FixedSizeArray,
|
55
|
+
Histogram,
|
53
56
|
Scalar,
|
54
57
|
Struct,
|
55
58
|
Table,
|
@@ -63,6 +66,7 @@ __all__ = [
|
|
63
66
|
"ArrayOfEqualSizedArrays",
|
64
67
|
"ArrayOfEncodedEqualSizedArrays",
|
65
68
|
"FixedSizeArray",
|
69
|
+
"Histogram",
|
66
70
|
"LGDO",
|
67
71
|
"Scalar",
|
68
72
|
"Struct",
|
@@ -7,6 +7,7 @@ from .read.array import (
|
|
7
7
|
_h5_read_ndarray,
|
8
8
|
)
|
9
9
|
from .read.composite import (
|
10
|
+
_h5_read_histogram,
|
10
11
|
_h5_read_lgdo,
|
11
12
|
_h5_read_struct,
|
12
13
|
_h5_read_table,
|
@@ -32,6 +33,7 @@ __all__ = [
|
|
32
33
|
"_h5_read_array_of_equalsized_arrays",
|
33
34
|
"_h5_read_struct",
|
34
35
|
"_h5_read_table",
|
36
|
+
"_h5_read_histogram",
|
35
37
|
"_h5_read_scalar",
|
36
38
|
"_h5_read_array_of_encoded_equalsized_arrays",
|
37
39
|
"_h5_read_vector_of_encoded_vectors",
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
|
6
|
+
from . import utils
|
7
|
+
from .ndarray import _h5_read_ndarray
|
8
|
+
|
9
|
+
log = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
def _h5_read_array_generic(type_, h5d, fname, oname, **kwargs):
|
13
|
+
nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, fname, oname, **kwargs)
|
14
|
+
|
15
|
+
obj_buf = kwargs["obj_buf"]
|
16
|
+
|
17
|
+
if obj_buf is None:
|
18
|
+
return type_(nda=nda, attrs=attrs), n_rows_to_read
|
19
|
+
|
20
|
+
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
21
|
+
|
22
|
+
return obj_buf, n_rows_to_read
|
23
|
+
|
24
|
+
|
25
|
+
def _h5_read_array(h5d, fname, oname, **kwargs):
|
26
|
+
return _h5_read_array_generic(Array, h5d, fname, oname, **kwargs)
|
27
|
+
|
28
|
+
|
29
|
+
def _h5_read_fixedsize_array(h5d, fname, oname, **kwargs):
|
30
|
+
return _h5_read_array_generic(FixedSizeArray, h5d, fname, oname, **kwargs)
|
31
|
+
|
32
|
+
|
33
|
+
def _h5_read_array_of_equalsized_arrays(h5d, fname, oname, **kwargs):
|
34
|
+
return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, fname, oname, **kwargs)
|
{legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/composite.py
RENAMED
@@ -13,6 +13,7 @@ from ....types import (
|
|
13
13
|
ArrayOfEncodedEqualSizedArrays,
|
14
14
|
ArrayOfEqualSizedArrays,
|
15
15
|
FixedSizeArray,
|
16
|
+
Histogram,
|
16
17
|
Scalar,
|
17
18
|
Struct,
|
18
19
|
Table,
|
@@ -22,7 +23,6 @@ from ....types import (
|
|
22
23
|
)
|
23
24
|
from ... import datatype as dtypeutils
|
24
25
|
from ...exceptions import LH5DecodeError
|
25
|
-
from ...utils import read_n_rows
|
26
26
|
from . import utils
|
27
27
|
from .array import (
|
28
28
|
_h5_read_array,
|
@@ -41,6 +41,8 @@ log = logging.getLogger(__name__)
|
|
41
41
|
|
42
42
|
def _h5_read_lgdo(
|
43
43
|
h5o,
|
44
|
+
fname,
|
45
|
+
oname,
|
44
46
|
start_row=0,
|
45
47
|
n_rows=sys.maxsize,
|
46
48
|
idx=None,
|
@@ -50,69 +52,23 @@ def _h5_read_lgdo(
|
|
50
52
|
obj_buf_start=0,
|
51
53
|
decompress=True,
|
52
54
|
):
|
53
|
-
# Handle list-of-files recursively
|
54
|
-
if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
|
55
|
-
lh5_objs = list(h5o)
|
56
|
-
n_rows_read = 0
|
57
|
-
|
58
|
-
for i, _h5o in enumerate(lh5_objs):
|
59
|
-
if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
|
60
|
-
# a list of lists: must be one per file
|
61
|
-
idx_i = idx[i]
|
62
|
-
elif idx is not None:
|
63
|
-
# make idx a proper tuple if it's not one already
|
64
|
-
if not (isinstance(idx, tuple) and len(idx) == 1):
|
65
|
-
idx = (idx,)
|
66
|
-
# idx is a long continuous array
|
67
|
-
n_rows_i = read_n_rows(_h5o)
|
68
|
-
# find the length of the subset of idx that contains indices
|
69
|
-
# that are less than n_rows_i
|
70
|
-
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
71
|
-
# now split idx into idx_i and the remainder
|
72
|
-
idx_i = (idx[0][:n_rows_to_read_i],)
|
73
|
-
idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
|
74
|
-
else:
|
75
|
-
idx_i = None
|
76
|
-
n_rows_i = n_rows - n_rows_read
|
77
|
-
|
78
|
-
obj_buf, n_rows_read_i = _h5_read_lgdo(
|
79
|
-
_h5o,
|
80
|
-
start_row=start_row,
|
81
|
-
n_rows=n_rows_i,
|
82
|
-
idx=idx_i,
|
83
|
-
use_h5idx=use_h5idx,
|
84
|
-
field_mask=field_mask,
|
85
|
-
obj_buf=obj_buf,
|
86
|
-
obj_buf_start=obj_buf_start,
|
87
|
-
decompress=decompress,
|
88
|
-
)
|
89
|
-
|
90
|
-
n_rows_read += n_rows_read_i
|
91
|
-
if n_rows_read >= n_rows or obj_buf is None:
|
92
|
-
return obj_buf, n_rows_read
|
93
|
-
start_row = 0
|
94
|
-
obj_buf_start += n_rows_read_i
|
95
|
-
|
96
|
-
return obj_buf, n_rows_read
|
97
|
-
|
98
55
|
log.debug(
|
99
|
-
f"reading {
|
56
|
+
f"reading {fname}:{oname}[{start_row}:{n_rows}], decompress = {decompress}, "
|
100
57
|
+ (f" with field mask {field_mask}" if field_mask else "")
|
101
58
|
)
|
102
59
|
|
103
|
-
|
104
|
-
if not (isinstance(idx, tuple) and len(idx) == 1) and idx is not None:
|
105
|
-
idx = (idx,)
|
106
|
-
|
60
|
+
attrs = utils.read_attrs(h5o, fname, oname)
|
107
61
|
try:
|
108
|
-
lgdotype = dtypeutils.datatype(
|
62
|
+
lgdotype = dtypeutils.datatype(attrs["datatype"])
|
109
63
|
except KeyError as e:
|
110
64
|
msg = "dataset not in file or missing 'datatype' attribute"
|
111
|
-
raise LH5DecodeError(msg,
|
65
|
+
raise LH5DecodeError(msg, fname, oname) from e
|
112
66
|
|
113
67
|
if lgdotype is Scalar:
|
114
68
|
return _h5_read_scalar(
|
115
69
|
h5o,
|
70
|
+
fname,
|
71
|
+
oname,
|
116
72
|
obj_buf=obj_buf,
|
117
73
|
)
|
118
74
|
|
@@ -124,7 +80,7 @@ def _h5_read_lgdo(
|
|
124
80
|
if len(field_mask) > 0:
|
125
81
|
default = not field_mask[next(iter(field_mask.keys()))]
|
126
82
|
field_mask = defaultdict(lambda: default, field_mask)
|
127
|
-
elif isinstance(field_mask, (list, tuple)):
|
83
|
+
elif isinstance(field_mask, (list, tuple, set)):
|
128
84
|
field_mask = defaultdict(bool, {field: True for field in field_mask})
|
129
85
|
elif not isinstance(field_mask, defaultdict):
|
130
86
|
msg = "bad field_mask type"
|
@@ -133,6 +89,8 @@ def _h5_read_lgdo(
|
|
133
89
|
if lgdotype is Struct:
|
134
90
|
return _h5_read_struct(
|
135
91
|
h5o,
|
92
|
+
fname,
|
93
|
+
oname,
|
136
94
|
start_row=start_row,
|
137
95
|
n_rows=n_rows,
|
138
96
|
idx=idx,
|
@@ -144,20 +102,37 @@ def _h5_read_lgdo(
|
|
144
102
|
# Below here is all array-like types. So trim idx if needed
|
145
103
|
if idx is not None:
|
146
104
|
# check if idx is just an ordered list of the integers if so can ignore
|
147
|
-
if (idx
|
148
|
-
if n_rows > len(idx
|
149
|
-
n_rows = len(idx
|
105
|
+
if (idx == np.arange(0, len(idx), 1)).all():
|
106
|
+
if n_rows > len(idx):
|
107
|
+
n_rows = len(idx)
|
150
108
|
idx = None
|
151
109
|
else:
|
152
110
|
# chop off indices < start_row
|
153
|
-
i_first_valid = bisect.bisect_left(idx
|
154
|
-
idxa = idx[
|
111
|
+
i_first_valid = bisect.bisect_left(idx, start_row)
|
112
|
+
idxa = idx[i_first_valid:]
|
155
113
|
# don't readout more than n_rows indices
|
156
|
-
idx =
|
114
|
+
idx = idxa[:n_rows] # works even if n_rows > len(idxa)
|
157
115
|
|
158
116
|
if lgdotype is Table:
|
159
117
|
return _h5_read_table(
|
160
118
|
h5o,
|
119
|
+
fname,
|
120
|
+
oname,
|
121
|
+
start_row=start_row,
|
122
|
+
n_rows=n_rows,
|
123
|
+
idx=idx,
|
124
|
+
use_h5idx=use_h5idx,
|
125
|
+
field_mask=field_mask,
|
126
|
+
obj_buf=obj_buf,
|
127
|
+
obj_buf_start=obj_buf_start,
|
128
|
+
decompress=decompress,
|
129
|
+
)
|
130
|
+
|
131
|
+
if lgdotype is Histogram:
|
132
|
+
return _h5_read_histogram(
|
133
|
+
h5o,
|
134
|
+
fname,
|
135
|
+
oname,
|
161
136
|
start_row=start_row,
|
162
137
|
n_rows=n_rows,
|
163
138
|
idx=idx,
|
@@ -171,6 +146,8 @@ def _h5_read_lgdo(
|
|
171
146
|
if lgdotype is ArrayOfEncodedEqualSizedArrays:
|
172
147
|
return _h5_read_array_of_encoded_equalsized_arrays(
|
173
148
|
h5o,
|
149
|
+
fname,
|
150
|
+
oname,
|
174
151
|
start_row=start_row,
|
175
152
|
n_rows=n_rows,
|
176
153
|
idx=idx,
|
@@ -183,6 +160,8 @@ def _h5_read_lgdo(
|
|
183
160
|
if lgdotype is VectorOfEncodedVectors:
|
184
161
|
return _h5_read_vector_of_encoded_vectors(
|
185
162
|
h5o,
|
163
|
+
fname,
|
164
|
+
oname,
|
186
165
|
start_row=start_row,
|
187
166
|
n_rows=n_rows,
|
188
167
|
idx=idx,
|
@@ -195,6 +174,8 @@ def _h5_read_lgdo(
|
|
195
174
|
if lgdotype is VectorOfVectors:
|
196
175
|
return _h5_read_vector_of_vectors(
|
197
176
|
h5o,
|
177
|
+
fname,
|
178
|
+
oname,
|
198
179
|
start_row=start_row,
|
199
180
|
n_rows=n_rows,
|
200
181
|
idx=idx,
|
@@ -206,6 +187,8 @@ def _h5_read_lgdo(
|
|
206
187
|
if lgdotype is FixedSizeArray:
|
207
188
|
return _h5_read_fixedsize_array(
|
208
189
|
h5o,
|
190
|
+
fname,
|
191
|
+
oname,
|
209
192
|
start_row=start_row,
|
210
193
|
n_rows=n_rows,
|
211
194
|
idx=idx,
|
@@ -217,6 +200,8 @@ def _h5_read_lgdo(
|
|
217
200
|
if lgdotype is ArrayOfEqualSizedArrays:
|
218
201
|
return _h5_read_array_of_equalsized_arrays(
|
219
202
|
h5o,
|
203
|
+
fname,
|
204
|
+
oname,
|
220
205
|
start_row=start_row,
|
221
206
|
n_rows=n_rows,
|
222
207
|
idx=idx,
|
@@ -228,6 +213,8 @@ def _h5_read_lgdo(
|
|
228
213
|
if lgdotype is Array:
|
229
214
|
return _h5_read_array(
|
230
215
|
h5o,
|
216
|
+
fname,
|
217
|
+
oname,
|
231
218
|
start_row=start_row,
|
232
219
|
n_rows=n_rows,
|
233
220
|
idx=idx,
|
@@ -237,11 +224,13 @@ def _h5_read_lgdo(
|
|
237
224
|
)
|
238
225
|
|
239
226
|
msg = f"no rule to decode {lgdotype.__name__} from LH5"
|
240
|
-
raise LH5DecodeError(msg,
|
227
|
+
raise LH5DecodeError(msg, fname, oname)
|
241
228
|
|
242
229
|
|
243
230
|
def _h5_read_struct(
|
244
231
|
h5g,
|
232
|
+
fname,
|
233
|
+
oname,
|
245
234
|
start_row=0,
|
246
235
|
n_rows=sys.maxsize,
|
247
236
|
idx=None,
|
@@ -254,7 +243,7 @@ def _h5_read_struct(
|
|
254
243
|
# table... Maybe should emit a warning? Or allow them to be
|
255
244
|
# dicts keyed by field name?
|
256
245
|
|
257
|
-
attrs =
|
246
|
+
attrs = utils.read_attrs(h5g, fname, oname)
|
258
247
|
|
259
248
|
# determine fields to be read out
|
260
249
|
all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
|
@@ -272,20 +261,26 @@ def _h5_read_struct(
|
|
272
261
|
for field in selected_fields:
|
273
262
|
# support for integer keys
|
274
263
|
field_key = int(field) if attrs.get("int_keys") else str(field)
|
264
|
+
h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
|
275
265
|
obj_dict[field_key], _ = _h5_read_lgdo(
|
276
|
-
|
266
|
+
h5o,
|
267
|
+
fname,
|
268
|
+
f"{oname}/{field}",
|
277
269
|
start_row=start_row,
|
278
270
|
n_rows=n_rows,
|
279
271
|
idx=idx,
|
280
272
|
use_h5idx=use_h5idx,
|
281
273
|
decompress=decompress,
|
282
274
|
)
|
275
|
+
h5o.close()
|
283
276
|
|
284
277
|
return Struct(obj_dict=obj_dict, attrs=attrs), 1
|
285
278
|
|
286
279
|
|
287
280
|
def _h5_read_table(
|
288
281
|
h5g,
|
282
|
+
fname,
|
283
|
+
oname,
|
289
284
|
start_row=0,
|
290
285
|
n_rows=sys.maxsize,
|
291
286
|
idx=None,
|
@@ -297,9 +292,9 @@ def _h5_read_table(
|
|
297
292
|
):
|
298
293
|
if obj_buf is not None and not isinstance(obj_buf, Table):
|
299
294
|
msg = "provided object buffer is not a Table"
|
300
|
-
raise LH5DecodeError(msg,
|
295
|
+
raise LH5DecodeError(msg, fname, oname)
|
301
296
|
|
302
|
-
attrs =
|
297
|
+
attrs = utils.read_attrs(h5g, fname, oname)
|
303
298
|
|
304
299
|
# determine fields to be read out
|
305
300
|
all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
|
@@ -320,12 +315,15 @@ def _h5_read_table(
|
|
320
315
|
if obj_buf is not None:
|
321
316
|
if not isinstance(obj_buf, Table) or field not in obj_buf:
|
322
317
|
msg = "provided object buffer is not a Table or columns are missing"
|
323
|
-
raise LH5DecodeError(msg,
|
318
|
+
raise LH5DecodeError(msg, fname, oname)
|
324
319
|
|
325
320
|
fld_buf = obj_buf[field]
|
326
321
|
|
322
|
+
h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
|
327
323
|
col_dict[field], n_rows_read = _h5_read_lgdo(
|
328
|
-
|
324
|
+
h5o,
|
325
|
+
fname,
|
326
|
+
f"{oname}/{field}",
|
329
327
|
start_row=start_row,
|
330
328
|
n_rows=n_rows,
|
331
329
|
idx=idx,
|
@@ -334,6 +332,7 @@ def _h5_read_table(
|
|
334
332
|
obj_buf_start=obj_buf_start,
|
335
333
|
decompress=decompress,
|
336
334
|
)
|
335
|
+
h5o.close()
|
337
336
|
|
338
337
|
if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
|
339
338
|
obj_buf.resize(obj_buf_start + n_rows_read)
|
@@ -345,12 +344,12 @@ def _h5_read_table(
|
|
345
344
|
n_rows_read = rows_read[0]
|
346
345
|
else:
|
347
346
|
n_rows_read = 0
|
348
|
-
log.warning(f"Table '{
|
347
|
+
log.warning(f"Table '{oname}' has no fields specified by {field_mask=}")
|
349
348
|
|
350
349
|
for n in rows_read[1:]:
|
351
350
|
if n != n_rows_read:
|
352
351
|
log.warning(
|
353
|
-
f"Table '{
|
352
|
+
f"Table '{oname}' got strange n_rows_read = {n}, "
|
354
353
|
"{n_rows_read} was expected ({rows_read})"
|
355
354
|
)
|
356
355
|
|
@@ -382,6 +381,59 @@ def _h5_read_table(
|
|
382
381
|
obj_buf.loc = obj_buf_start + n_rows_read
|
383
382
|
|
384
383
|
# check attributes
|
385
|
-
utils.check_obj_buf_attrs(obj_buf.attrs, attrs,
|
384
|
+
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
386
385
|
|
387
386
|
return obj_buf, n_rows_read
|
387
|
+
|
388
|
+
|
389
|
+
def _h5_read_histogram(
|
390
|
+
h5g,
|
391
|
+
fname,
|
392
|
+
oname,
|
393
|
+
start_row=0,
|
394
|
+
n_rows=sys.maxsize,
|
395
|
+
idx=None,
|
396
|
+
use_h5idx=False,
|
397
|
+
field_mask=None,
|
398
|
+
obj_buf=None,
|
399
|
+
obj_buf_start=0,
|
400
|
+
decompress=True,
|
401
|
+
):
|
402
|
+
if obj_buf is not None or obj_buf_start != 0:
|
403
|
+
msg = "reading a histogram into an existing object buffer is not supported"
|
404
|
+
raise LH5DecodeError(msg, fname, oname)
|
405
|
+
|
406
|
+
struct, n_rows_read = _h5_read_struct(
|
407
|
+
h5g,
|
408
|
+
fname,
|
409
|
+
oname,
|
410
|
+
start_row=start_row,
|
411
|
+
n_rows=n_rows,
|
412
|
+
idx=idx,
|
413
|
+
use_h5idx=use_h5idx,
|
414
|
+
field_mask=field_mask,
|
415
|
+
decompress=decompress,
|
416
|
+
)
|
417
|
+
|
418
|
+
binning = []
|
419
|
+
for _, a in struct.binning.items():
|
420
|
+
be = a.binedges
|
421
|
+
if isinstance(be, Struct):
|
422
|
+
b = (None, be.first.value, be.last.value, be.step.value, a.closedleft.value)
|
423
|
+
elif isinstance(be, Array):
|
424
|
+
b = (be, None, None, None, a.closedleft.value)
|
425
|
+
else:
|
426
|
+
msg = "unexpected binning of histogram"
|
427
|
+
raise LH5DecodeError(msg, fname, oname)
|
428
|
+
ax = Histogram.Axis(*b)
|
429
|
+
# copy attrs to "clone" the "whole" struct.
|
430
|
+
ax.attrs = a.getattrs(datatype=True)
|
431
|
+
ax["binedges"].attrs = be.getattrs(datatype=True)
|
432
|
+
binning.append(ax)
|
433
|
+
|
434
|
+
isdensity = struct.isdensity.value
|
435
|
+
weights = struct.weights
|
436
|
+
attrs = struct.getattrs(datatype=True)
|
437
|
+
histogram = Histogram(weights, binning, isdensity, attrs=attrs)
|
438
|
+
|
439
|
+
return histogram, n_rows_read
|
{legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/encoded.py
RENAMED
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
|
6
|
+
import h5py
|
7
|
+
|
6
8
|
from .... import compression as compress
|
7
9
|
from ....types import (
|
8
10
|
ArrayOfEncodedEqualSizedArrays,
|
@@ -13,6 +15,7 @@ from .array import (
|
|
13
15
|
_h5_read_array,
|
14
16
|
)
|
15
17
|
from .scalar import _h5_read_scalar
|
18
|
+
from .utils import read_attrs
|
16
19
|
from .vector_of_vectors import _h5_read_vector_of_vectors
|
17
20
|
|
18
21
|
log = logging.getLogger(__name__)
|
@@ -20,21 +23,29 @@ log = logging.getLogger(__name__)
|
|
20
23
|
|
21
24
|
def _h5_read_array_of_encoded_equalsized_arrays(
|
22
25
|
h5g,
|
26
|
+
fname,
|
27
|
+
oname,
|
23
28
|
**kwargs,
|
24
29
|
):
|
25
|
-
return _h5_read_encoded_array(
|
30
|
+
return _h5_read_encoded_array(
|
31
|
+
ArrayOfEncodedEqualSizedArrays, h5g, fname, oname, **kwargs
|
32
|
+
)
|
26
33
|
|
27
34
|
|
28
35
|
def _h5_read_vector_of_encoded_vectors(
|
29
36
|
h5g,
|
37
|
+
fname,
|
38
|
+
oname,
|
30
39
|
**kwargs,
|
31
40
|
):
|
32
|
-
return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
|
41
|
+
return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, fname, oname, **kwargs)
|
33
42
|
|
34
43
|
|
35
44
|
def _h5_read_encoded_array(
|
36
45
|
lgdotype,
|
37
46
|
h5g,
|
47
|
+
fname,
|
48
|
+
oname,
|
38
49
|
start_row=0,
|
39
50
|
n_rows=sys.maxsize,
|
40
51
|
idx=None,
|
@@ -45,11 +56,11 @@ def _h5_read_encoded_array(
|
|
45
56
|
):
|
46
57
|
if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
|
47
58
|
msg = f"unsupported read of encoded type {lgdotype.__name__}"
|
48
|
-
raise LH5DecodeError(msg,
|
59
|
+
raise LH5DecodeError(msg, fname, oname)
|
49
60
|
|
50
61
|
if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
|
51
62
|
msg = f"object buffer is not a {lgdotype.__name__}"
|
52
|
-
raise LH5DecodeError(msg,
|
63
|
+
raise LH5DecodeError(msg, fname, oname)
|
53
64
|
|
54
65
|
# read out decoded_size, either a Scalar or an Array
|
55
66
|
decoded_size_buf = encoded_data_buf = None
|
@@ -58,8 +69,11 @@ def _h5_read_encoded_array(
|
|
58
69
|
encoded_data_buf = obj_buf.encoded_data
|
59
70
|
|
60
71
|
if lgdotype is VectorOfEncodedVectors:
|
72
|
+
h5o = h5py.h5o.open(h5g, b"decoded_size")
|
61
73
|
decoded_size, _ = _h5_read_array(
|
62
|
-
|
74
|
+
h5o,
|
75
|
+
fname,
|
76
|
+
f"{oname}/decoded_size",
|
63
77
|
start_row=start_row,
|
64
78
|
n_rows=n_rows,
|
65
79
|
idx=idx,
|
@@ -67,16 +81,23 @@ def _h5_read_encoded_array(
|
|
67
81
|
obj_buf=None if decompress else decoded_size_buf,
|
68
82
|
obj_buf_start=0 if decompress else obj_buf_start,
|
69
83
|
)
|
70
|
-
|
84
|
+
h5o.close()
|
71
85
|
else:
|
86
|
+
h5o = h5py.h5o.open(h5g, b"decoded_size")
|
72
87
|
decoded_size, _ = _h5_read_scalar(
|
73
|
-
|
88
|
+
h5o,
|
89
|
+
fname,
|
90
|
+
f"{oname}/decoded_size",
|
74
91
|
obj_buf=None if decompress else decoded_size_buf,
|
75
92
|
)
|
93
|
+
h5o.close()
|
76
94
|
|
77
95
|
# read out encoded_data, a VectorOfVectors
|
96
|
+
h5o = h5py.h5o.open(h5g, b"encoded_data")
|
78
97
|
encoded_data, n_rows_read = _h5_read_vector_of_vectors(
|
79
|
-
|
98
|
+
h5o,
|
99
|
+
fname,
|
100
|
+
f"{oname}/encoded_data",
|
80
101
|
start_row=start_row,
|
81
102
|
n_rows=n_rows,
|
82
103
|
idx=idx,
|
@@ -84,6 +105,7 @@ def _h5_read_encoded_array(
|
|
84
105
|
obj_buf=None if decompress else encoded_data_buf,
|
85
106
|
obj_buf_start=0 if decompress else obj_buf_start,
|
86
107
|
)
|
108
|
+
h5o.close()
|
87
109
|
|
88
110
|
# return the still encoded data in the buffer object, if there
|
89
111
|
if obj_buf is not None and not decompress:
|
@@ -93,7 +115,7 @@ def _h5_read_encoded_array(
|
|
93
115
|
rawdata = lgdotype(
|
94
116
|
encoded_data=encoded_data,
|
95
117
|
decoded_size=decoded_size,
|
96
|
-
attrs=
|
118
|
+
attrs=read_attrs(h5g, fname, oname),
|
97
119
|
)
|
98
120
|
|
99
121
|
# already return if no decompression is requested
|