legend-pydataobj 1.9.0__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/PKG-INFO +2 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/pyproject.toml +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/PKG-INFO +2 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/requires.txt +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/_version.py +2 -2
- legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/array.py +34 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/composite.py +68 -78
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/encoded.py +31 -9
- legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/ndarray.py +117 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/scalar.py +10 -3
- legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/utils.py +35 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +35 -13
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/array.py +6 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/composite.py +14 -5
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/scalar.py +6 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/core.py +78 -7
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/exceptions.py +3 -3
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/store.py +101 -11
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/tools.py +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/utils.py +13 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/histogram.py +18 -3
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_core.py +6 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_write.py +11 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_histogram.py +9 -3
- legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/array.py +0 -34
- legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/ndarray.py +0 -103
- legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/utils.py +0 -12
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/LICENSE +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/README.md +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/setup.cfg +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/cli.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/base.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/generic.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/radware.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/varlen.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lgdo_utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/datatype.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/iterator.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5_store.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/logging.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/array.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/encoded.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/fixedsizearray.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/lgdo.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/scalar.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/struct.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/table.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/vectorofvectors.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/vovutils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/waveformtable.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/units.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/conftest.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_compression.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_radware_sigcompress.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_str2wfcodec.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/conftest.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/conftest.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_datatype.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_iterator.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_store.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_tools.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/test_cli.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/test_lgdo_utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_array.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_encoded.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_fixedsizearray.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_representations.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_scalar.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_struct.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_table.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_table_eval.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_vectorofvectors.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_vovutils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.0
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -700,7 +700,7 @@ License-File: LICENSE
|
|
700
700
|
Requires-Dist: awkward>=2
|
701
701
|
Requires-Dist: awkward-pandas
|
702
702
|
Requires-Dist: colorlog
|
703
|
-
Requires-Dist: h5py>=3.
|
703
|
+
Requires-Dist: h5py>=3.10
|
704
704
|
Requires-Dist: hdf5plugin
|
705
705
|
Requires-Dist: hist
|
706
706
|
Requires-Dist: numba!=0.53.*,!=0.54.*
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.0
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -700,7 +700,7 @@ License-File: LICENSE
|
|
700
700
|
Requires-Dist: awkward>=2
|
701
701
|
Requires-Dist: awkward-pandas
|
702
702
|
Requires-Dist: colorlog
|
703
|
-
Requires-Dist: h5py>=3.
|
703
|
+
Requires-Dist: h5py>=3.10
|
704
704
|
Requires-Dist: hdf5plugin
|
705
705
|
Requires-Dist: hist
|
706
706
|
Requires-Dist: numba!=0.53.*,!=0.54.*
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
|
6
|
+
from . import utils
|
7
|
+
from .ndarray import _h5_read_ndarray
|
8
|
+
|
9
|
+
log = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
def _h5_read_array_generic(type_, h5d, fname, oname, **kwargs):
|
13
|
+
nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, fname, oname, **kwargs)
|
14
|
+
|
15
|
+
obj_buf = kwargs["obj_buf"]
|
16
|
+
|
17
|
+
if obj_buf is None:
|
18
|
+
return type_(nda=nda, attrs=attrs), n_rows_to_read
|
19
|
+
|
20
|
+
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
21
|
+
|
22
|
+
return obj_buf, n_rows_to_read
|
23
|
+
|
24
|
+
|
25
|
+
def _h5_read_array(h5d, fname, oname, **kwargs):
|
26
|
+
return _h5_read_array_generic(Array, h5d, fname, oname, **kwargs)
|
27
|
+
|
28
|
+
|
29
|
+
def _h5_read_fixedsize_array(h5d, fname, oname, **kwargs):
|
30
|
+
return _h5_read_array_generic(FixedSizeArray, h5d, fname, oname, **kwargs)
|
31
|
+
|
32
|
+
|
33
|
+
def _h5_read_array_of_equalsized_arrays(h5d, fname, oname, **kwargs):
|
34
|
+
return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, fname, oname, **kwargs)
|
{legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/composite.py
RENAMED
@@ -23,7 +23,6 @@ from ....types import (
|
|
23
23
|
)
|
24
24
|
from ... import datatype as dtypeutils
|
25
25
|
from ...exceptions import LH5DecodeError
|
26
|
-
from ...utils import read_n_rows
|
27
26
|
from . import utils
|
28
27
|
from .array import (
|
29
28
|
_h5_read_array,
|
@@ -42,6 +41,8 @@ log = logging.getLogger(__name__)
|
|
42
41
|
|
43
42
|
def _h5_read_lgdo(
|
44
43
|
h5o,
|
44
|
+
fname,
|
45
|
+
oname,
|
45
46
|
start_row=0,
|
46
47
|
n_rows=sys.maxsize,
|
47
48
|
idx=None,
|
@@ -51,69 +52,23 @@ def _h5_read_lgdo(
|
|
51
52
|
obj_buf_start=0,
|
52
53
|
decompress=True,
|
53
54
|
):
|
54
|
-
# Handle list-of-files recursively
|
55
|
-
if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
|
56
|
-
lh5_objs = list(h5o)
|
57
|
-
n_rows_read = 0
|
58
|
-
|
59
|
-
for i, _h5o in enumerate(lh5_objs):
|
60
|
-
if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
|
61
|
-
# a list of lists: must be one per file
|
62
|
-
idx_i = idx[i]
|
63
|
-
elif idx is not None:
|
64
|
-
# make idx a proper tuple if it's not one already
|
65
|
-
if not (isinstance(idx, tuple) and len(idx) == 1):
|
66
|
-
idx = (idx,)
|
67
|
-
# idx is a long continuous array
|
68
|
-
n_rows_i = read_n_rows(_h5o)
|
69
|
-
# find the length of the subset of idx that contains indices
|
70
|
-
# that are less than n_rows_i
|
71
|
-
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
72
|
-
# now split idx into idx_i and the remainder
|
73
|
-
idx_i = (idx[0][:n_rows_to_read_i],)
|
74
|
-
idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
|
75
|
-
else:
|
76
|
-
idx_i = None
|
77
|
-
n_rows_i = n_rows - n_rows_read
|
78
|
-
|
79
|
-
obj_buf, n_rows_read_i = _h5_read_lgdo(
|
80
|
-
_h5o,
|
81
|
-
start_row=start_row,
|
82
|
-
n_rows=n_rows_i,
|
83
|
-
idx=idx_i,
|
84
|
-
use_h5idx=use_h5idx,
|
85
|
-
field_mask=field_mask,
|
86
|
-
obj_buf=obj_buf,
|
87
|
-
obj_buf_start=obj_buf_start,
|
88
|
-
decompress=decompress,
|
89
|
-
)
|
90
|
-
|
91
|
-
n_rows_read += n_rows_read_i
|
92
|
-
if n_rows_read >= n_rows or obj_buf is None:
|
93
|
-
return obj_buf, n_rows_read
|
94
|
-
start_row = 0
|
95
|
-
obj_buf_start += n_rows_read_i
|
96
|
-
|
97
|
-
return obj_buf, n_rows_read
|
98
|
-
|
99
55
|
log.debug(
|
100
|
-
f"reading {
|
56
|
+
f"reading {fname}:{oname}[{start_row}:{n_rows}], decompress = {decompress}, "
|
101
57
|
+ (f" with field mask {field_mask}" if field_mask else "")
|
102
58
|
)
|
103
59
|
|
104
|
-
|
105
|
-
if not (isinstance(idx, tuple) and len(idx) == 1) and idx is not None:
|
106
|
-
idx = (idx,)
|
107
|
-
|
60
|
+
attrs = utils.read_attrs(h5o, fname, oname)
|
108
61
|
try:
|
109
|
-
lgdotype = dtypeutils.datatype(
|
62
|
+
lgdotype = dtypeutils.datatype(attrs["datatype"])
|
110
63
|
except KeyError as e:
|
111
64
|
msg = "dataset not in file or missing 'datatype' attribute"
|
112
|
-
raise LH5DecodeError(msg,
|
65
|
+
raise LH5DecodeError(msg, fname, oname) from e
|
113
66
|
|
114
67
|
if lgdotype is Scalar:
|
115
68
|
return _h5_read_scalar(
|
116
69
|
h5o,
|
70
|
+
fname,
|
71
|
+
oname,
|
117
72
|
obj_buf=obj_buf,
|
118
73
|
)
|
119
74
|
|
@@ -125,7 +80,7 @@ def _h5_read_lgdo(
|
|
125
80
|
if len(field_mask) > 0:
|
126
81
|
default = not field_mask[next(iter(field_mask.keys()))]
|
127
82
|
field_mask = defaultdict(lambda: default, field_mask)
|
128
|
-
elif isinstance(field_mask, (list, tuple)):
|
83
|
+
elif isinstance(field_mask, (list, tuple, set)):
|
129
84
|
field_mask = defaultdict(bool, {field: True for field in field_mask})
|
130
85
|
elif not isinstance(field_mask, defaultdict):
|
131
86
|
msg = "bad field_mask type"
|
@@ -134,6 +89,8 @@ def _h5_read_lgdo(
|
|
134
89
|
if lgdotype is Struct:
|
135
90
|
return _h5_read_struct(
|
136
91
|
h5o,
|
92
|
+
fname,
|
93
|
+
oname,
|
137
94
|
start_row=start_row,
|
138
95
|
n_rows=n_rows,
|
139
96
|
idx=idx,
|
@@ -145,20 +102,22 @@ def _h5_read_lgdo(
|
|
145
102
|
# Below here is all array-like types. So trim idx if needed
|
146
103
|
if idx is not None:
|
147
104
|
# check if idx is just an ordered list of the integers if so can ignore
|
148
|
-
if (idx
|
149
|
-
if n_rows > len(idx
|
150
|
-
n_rows = len(idx
|
105
|
+
if (idx == np.arange(0, len(idx), 1)).all():
|
106
|
+
if n_rows > len(idx):
|
107
|
+
n_rows = len(idx)
|
151
108
|
idx = None
|
152
109
|
else:
|
153
110
|
# chop off indices < start_row
|
154
|
-
i_first_valid = bisect.bisect_left(idx
|
155
|
-
idxa = idx[
|
111
|
+
i_first_valid = bisect.bisect_left(idx, start_row)
|
112
|
+
idxa = idx[i_first_valid:]
|
156
113
|
# don't readout more than n_rows indices
|
157
|
-
idx =
|
114
|
+
idx = idxa[:n_rows] # works even if n_rows > len(idxa)
|
158
115
|
|
159
116
|
if lgdotype is Table:
|
160
117
|
return _h5_read_table(
|
161
118
|
h5o,
|
119
|
+
fname,
|
120
|
+
oname,
|
162
121
|
start_row=start_row,
|
163
122
|
n_rows=n_rows,
|
164
123
|
idx=idx,
|
@@ -172,6 +131,8 @@ def _h5_read_lgdo(
|
|
172
131
|
if lgdotype is Histogram:
|
173
132
|
return _h5_read_histogram(
|
174
133
|
h5o,
|
134
|
+
fname,
|
135
|
+
oname,
|
175
136
|
start_row=start_row,
|
176
137
|
n_rows=n_rows,
|
177
138
|
idx=idx,
|
@@ -185,6 +146,8 @@ def _h5_read_lgdo(
|
|
185
146
|
if lgdotype is ArrayOfEncodedEqualSizedArrays:
|
186
147
|
return _h5_read_array_of_encoded_equalsized_arrays(
|
187
148
|
h5o,
|
149
|
+
fname,
|
150
|
+
oname,
|
188
151
|
start_row=start_row,
|
189
152
|
n_rows=n_rows,
|
190
153
|
idx=idx,
|
@@ -197,6 +160,8 @@ def _h5_read_lgdo(
|
|
197
160
|
if lgdotype is VectorOfEncodedVectors:
|
198
161
|
return _h5_read_vector_of_encoded_vectors(
|
199
162
|
h5o,
|
163
|
+
fname,
|
164
|
+
oname,
|
200
165
|
start_row=start_row,
|
201
166
|
n_rows=n_rows,
|
202
167
|
idx=idx,
|
@@ -209,6 +174,8 @@ def _h5_read_lgdo(
|
|
209
174
|
if lgdotype is VectorOfVectors:
|
210
175
|
return _h5_read_vector_of_vectors(
|
211
176
|
h5o,
|
177
|
+
fname,
|
178
|
+
oname,
|
212
179
|
start_row=start_row,
|
213
180
|
n_rows=n_rows,
|
214
181
|
idx=idx,
|
@@ -220,6 +187,8 @@ def _h5_read_lgdo(
|
|
220
187
|
if lgdotype is FixedSizeArray:
|
221
188
|
return _h5_read_fixedsize_array(
|
222
189
|
h5o,
|
190
|
+
fname,
|
191
|
+
oname,
|
223
192
|
start_row=start_row,
|
224
193
|
n_rows=n_rows,
|
225
194
|
idx=idx,
|
@@ -231,6 +200,8 @@ def _h5_read_lgdo(
|
|
231
200
|
if lgdotype is ArrayOfEqualSizedArrays:
|
232
201
|
return _h5_read_array_of_equalsized_arrays(
|
233
202
|
h5o,
|
203
|
+
fname,
|
204
|
+
oname,
|
234
205
|
start_row=start_row,
|
235
206
|
n_rows=n_rows,
|
236
207
|
idx=idx,
|
@@ -242,6 +213,8 @@ def _h5_read_lgdo(
|
|
242
213
|
if lgdotype is Array:
|
243
214
|
return _h5_read_array(
|
244
215
|
h5o,
|
216
|
+
fname,
|
217
|
+
oname,
|
245
218
|
start_row=start_row,
|
246
219
|
n_rows=n_rows,
|
247
220
|
idx=idx,
|
@@ -251,11 +224,13 @@ def _h5_read_lgdo(
|
|
251
224
|
)
|
252
225
|
|
253
226
|
msg = f"no rule to decode {lgdotype.__name__} from LH5"
|
254
|
-
raise LH5DecodeError(msg,
|
227
|
+
raise LH5DecodeError(msg, fname, oname)
|
255
228
|
|
256
229
|
|
257
230
|
def _h5_read_struct(
|
258
231
|
h5g,
|
232
|
+
fname,
|
233
|
+
oname,
|
259
234
|
start_row=0,
|
260
235
|
n_rows=sys.maxsize,
|
261
236
|
idx=None,
|
@@ -268,7 +243,7 @@ def _h5_read_struct(
|
|
268
243
|
# table... Maybe should emit a warning? Or allow them to be
|
269
244
|
# dicts keyed by field name?
|
270
245
|
|
271
|
-
attrs =
|
246
|
+
attrs = utils.read_attrs(h5g, fname, oname)
|
272
247
|
|
273
248
|
# determine fields to be read out
|
274
249
|
all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
|
@@ -286,20 +261,26 @@ def _h5_read_struct(
|
|
286
261
|
for field in selected_fields:
|
287
262
|
# support for integer keys
|
288
263
|
field_key = int(field) if attrs.get("int_keys") else str(field)
|
264
|
+
h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
|
289
265
|
obj_dict[field_key], _ = _h5_read_lgdo(
|
290
|
-
|
266
|
+
h5o,
|
267
|
+
fname,
|
268
|
+
f"{oname}/{field}",
|
291
269
|
start_row=start_row,
|
292
270
|
n_rows=n_rows,
|
293
271
|
idx=idx,
|
294
272
|
use_h5idx=use_h5idx,
|
295
273
|
decompress=decompress,
|
296
274
|
)
|
275
|
+
h5o.close()
|
297
276
|
|
298
277
|
return Struct(obj_dict=obj_dict, attrs=attrs), 1
|
299
278
|
|
300
279
|
|
301
280
|
def _h5_read_table(
|
302
281
|
h5g,
|
282
|
+
fname,
|
283
|
+
oname,
|
303
284
|
start_row=0,
|
304
285
|
n_rows=sys.maxsize,
|
305
286
|
idx=None,
|
@@ -311,9 +292,9 @@ def _h5_read_table(
|
|
311
292
|
):
|
312
293
|
if obj_buf is not None and not isinstance(obj_buf, Table):
|
313
294
|
msg = "provided object buffer is not a Table"
|
314
|
-
raise LH5DecodeError(msg,
|
295
|
+
raise LH5DecodeError(msg, fname, oname)
|
315
296
|
|
316
|
-
attrs =
|
297
|
+
attrs = utils.read_attrs(h5g, fname, oname)
|
317
298
|
|
318
299
|
# determine fields to be read out
|
319
300
|
all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
|
@@ -334,12 +315,15 @@ def _h5_read_table(
|
|
334
315
|
if obj_buf is not None:
|
335
316
|
if not isinstance(obj_buf, Table) or field not in obj_buf:
|
336
317
|
msg = "provided object buffer is not a Table or columns are missing"
|
337
|
-
raise LH5DecodeError(msg,
|
318
|
+
raise LH5DecodeError(msg, fname, oname)
|
338
319
|
|
339
320
|
fld_buf = obj_buf[field]
|
340
321
|
|
322
|
+
h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
|
341
323
|
col_dict[field], n_rows_read = _h5_read_lgdo(
|
342
|
-
|
324
|
+
h5o,
|
325
|
+
fname,
|
326
|
+
f"{oname}/{field}",
|
343
327
|
start_row=start_row,
|
344
328
|
n_rows=n_rows,
|
345
329
|
idx=idx,
|
@@ -348,6 +332,7 @@ def _h5_read_table(
|
|
348
332
|
obj_buf_start=obj_buf_start,
|
349
333
|
decompress=decompress,
|
350
334
|
)
|
335
|
+
h5o.close()
|
351
336
|
|
352
337
|
if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
|
353
338
|
obj_buf.resize(obj_buf_start + n_rows_read)
|
@@ -359,12 +344,12 @@ def _h5_read_table(
|
|
359
344
|
n_rows_read = rows_read[0]
|
360
345
|
else:
|
361
346
|
n_rows_read = 0
|
362
|
-
log.warning(f"Table '{
|
347
|
+
log.warning(f"Table '{oname}' has no fields specified by {field_mask=}")
|
363
348
|
|
364
349
|
for n in rows_read[1:]:
|
365
350
|
if n != n_rows_read:
|
366
351
|
log.warning(
|
367
|
-
f"Table '{
|
352
|
+
f"Table '{oname}' got strange n_rows_read = {n}, "
|
368
353
|
"{n_rows_read} was expected ({rows_read})"
|
369
354
|
)
|
370
355
|
|
@@ -396,13 +381,15 @@ def _h5_read_table(
|
|
396
381
|
obj_buf.loc = obj_buf_start + n_rows_read
|
397
382
|
|
398
383
|
# check attributes
|
399
|
-
utils.check_obj_buf_attrs(obj_buf.attrs, attrs,
|
384
|
+
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
400
385
|
|
401
386
|
return obj_buf, n_rows_read
|
402
387
|
|
403
388
|
|
404
389
|
def _h5_read_histogram(
|
405
390
|
h5g,
|
391
|
+
fname,
|
392
|
+
oname,
|
406
393
|
start_row=0,
|
407
394
|
n_rows=sys.maxsize,
|
408
395
|
idx=None,
|
@@ -414,17 +401,20 @@ def _h5_read_histogram(
|
|
414
401
|
):
|
415
402
|
if obj_buf is not None or obj_buf_start != 0:
|
416
403
|
msg = "reading a histogram into an existing object buffer is not supported"
|
417
|
-
raise LH5DecodeError(msg,
|
404
|
+
raise LH5DecodeError(msg, fname, oname)
|
418
405
|
|
419
406
|
struct, n_rows_read = _h5_read_struct(
|
420
407
|
h5g,
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
408
|
+
fname,
|
409
|
+
oname,
|
410
|
+
start_row=start_row,
|
411
|
+
n_rows=n_rows,
|
412
|
+
idx=idx,
|
413
|
+
use_h5idx=use_h5idx,
|
414
|
+
field_mask=field_mask,
|
415
|
+
decompress=decompress,
|
427
416
|
)
|
417
|
+
|
428
418
|
binning = []
|
429
419
|
for _, a in struct.binning.items():
|
430
420
|
be = a.binedges
|
@@ -434,7 +424,7 @@ def _h5_read_histogram(
|
|
434
424
|
b = (be, None, None, None, a.closedleft.value)
|
435
425
|
else:
|
436
426
|
msg = "unexpected binning of histogram"
|
437
|
-
raise LH5DecodeError(msg,
|
427
|
+
raise LH5DecodeError(msg, fname, oname)
|
438
428
|
ax = Histogram.Axis(*b)
|
439
429
|
# copy attrs to "clone" the "whole" struct.
|
440
430
|
ax.attrs = a.getattrs(datatype=True)
|
{legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/encoded.py
RENAMED
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
|
6
|
+
import h5py
|
7
|
+
|
6
8
|
from .... import compression as compress
|
7
9
|
from ....types import (
|
8
10
|
ArrayOfEncodedEqualSizedArrays,
|
@@ -13,6 +15,7 @@ from .array import (
|
|
13
15
|
_h5_read_array,
|
14
16
|
)
|
15
17
|
from .scalar import _h5_read_scalar
|
18
|
+
from .utils import read_attrs
|
16
19
|
from .vector_of_vectors import _h5_read_vector_of_vectors
|
17
20
|
|
18
21
|
log = logging.getLogger(__name__)
|
@@ -20,21 +23,29 @@ log = logging.getLogger(__name__)
|
|
20
23
|
|
21
24
|
def _h5_read_array_of_encoded_equalsized_arrays(
|
22
25
|
h5g,
|
26
|
+
fname,
|
27
|
+
oname,
|
23
28
|
**kwargs,
|
24
29
|
):
|
25
|
-
return _h5_read_encoded_array(
|
30
|
+
return _h5_read_encoded_array(
|
31
|
+
ArrayOfEncodedEqualSizedArrays, h5g, fname, oname, **kwargs
|
32
|
+
)
|
26
33
|
|
27
34
|
|
28
35
|
def _h5_read_vector_of_encoded_vectors(
|
29
36
|
h5g,
|
37
|
+
fname,
|
38
|
+
oname,
|
30
39
|
**kwargs,
|
31
40
|
):
|
32
|
-
return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
|
41
|
+
return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, fname, oname, **kwargs)
|
33
42
|
|
34
43
|
|
35
44
|
def _h5_read_encoded_array(
|
36
45
|
lgdotype,
|
37
46
|
h5g,
|
47
|
+
fname,
|
48
|
+
oname,
|
38
49
|
start_row=0,
|
39
50
|
n_rows=sys.maxsize,
|
40
51
|
idx=None,
|
@@ -45,11 +56,11 @@ def _h5_read_encoded_array(
|
|
45
56
|
):
|
46
57
|
if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
|
47
58
|
msg = f"unsupported read of encoded type {lgdotype.__name__}"
|
48
|
-
raise LH5DecodeError(msg,
|
59
|
+
raise LH5DecodeError(msg, fname, oname)
|
49
60
|
|
50
61
|
if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
|
51
62
|
msg = f"object buffer is not a {lgdotype.__name__}"
|
52
|
-
raise LH5DecodeError(msg,
|
63
|
+
raise LH5DecodeError(msg, fname, oname)
|
53
64
|
|
54
65
|
# read out decoded_size, either a Scalar or an Array
|
55
66
|
decoded_size_buf = encoded_data_buf = None
|
@@ -58,8 +69,11 @@ def _h5_read_encoded_array(
|
|
58
69
|
encoded_data_buf = obj_buf.encoded_data
|
59
70
|
|
60
71
|
if lgdotype is VectorOfEncodedVectors:
|
72
|
+
h5o = h5py.h5o.open(h5g, b"decoded_size")
|
61
73
|
decoded_size, _ = _h5_read_array(
|
62
|
-
|
74
|
+
h5o,
|
75
|
+
fname,
|
76
|
+
f"{oname}/decoded_size",
|
63
77
|
start_row=start_row,
|
64
78
|
n_rows=n_rows,
|
65
79
|
idx=idx,
|
@@ -67,16 +81,23 @@ def _h5_read_encoded_array(
|
|
67
81
|
obj_buf=None if decompress else decoded_size_buf,
|
68
82
|
obj_buf_start=0 if decompress else obj_buf_start,
|
69
83
|
)
|
70
|
-
|
84
|
+
h5o.close()
|
71
85
|
else:
|
86
|
+
h5o = h5py.h5o.open(h5g, b"decoded_size")
|
72
87
|
decoded_size, _ = _h5_read_scalar(
|
73
|
-
|
88
|
+
h5o,
|
89
|
+
fname,
|
90
|
+
f"{oname}/decoded_size",
|
74
91
|
obj_buf=None if decompress else decoded_size_buf,
|
75
92
|
)
|
93
|
+
h5o.close()
|
76
94
|
|
77
95
|
# read out encoded_data, a VectorOfVectors
|
96
|
+
h5o = h5py.h5o.open(h5g, b"encoded_data")
|
78
97
|
encoded_data, n_rows_read = _h5_read_vector_of_vectors(
|
79
|
-
|
98
|
+
h5o,
|
99
|
+
fname,
|
100
|
+
f"{oname}/encoded_data",
|
80
101
|
start_row=start_row,
|
81
102
|
n_rows=n_rows,
|
82
103
|
idx=idx,
|
@@ -84,6 +105,7 @@ def _h5_read_encoded_array(
|
|
84
105
|
obj_buf=None if decompress else encoded_data_buf,
|
85
106
|
obj_buf_start=0 if decompress else obj_buf_start,
|
86
107
|
)
|
108
|
+
h5o.close()
|
87
109
|
|
88
110
|
# return the still encoded data in the buffer object, if there
|
89
111
|
if obj_buf is not None and not decompress:
|
@@ -93,7 +115,7 @@ def _h5_read_encoded_array(
|
|
93
115
|
rawdata = lgdotype(
|
94
116
|
encoded_data=encoded_data,
|
95
117
|
decoded_size=decoded_size,
|
96
|
-
attrs=
|
118
|
+
attrs=read_attrs(h5g, fname, oname),
|
97
119
|
)
|
98
120
|
|
99
121
|
# already return if no decompression is requested
|
@@ -0,0 +1,117 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
from bisect import bisect_left
|
6
|
+
|
7
|
+
import h5py
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
from ....types import Array
|
11
|
+
from ... import datatype
|
12
|
+
from ...exceptions import LH5DecodeError
|
13
|
+
from .utils import read_attrs
|
14
|
+
|
15
|
+
log = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
def _h5_read_ndarray(
|
19
|
+
h5d,
|
20
|
+
fname,
|
21
|
+
oname,
|
22
|
+
start_row=0,
|
23
|
+
n_rows=sys.maxsize,
|
24
|
+
idx=None,
|
25
|
+
use_h5idx=False,
|
26
|
+
obj_buf=None,
|
27
|
+
obj_buf_start=0,
|
28
|
+
):
|
29
|
+
if obj_buf is not None and not isinstance(obj_buf, Array):
|
30
|
+
msg = "object buffer is not an Array"
|
31
|
+
raise LH5DecodeError(msg, fname, oname)
|
32
|
+
|
33
|
+
# compute the number of rows to read
|
34
|
+
# we culled idx above for start_row and n_rows, now we have to apply
|
35
|
+
# the constraint of the length of the dataset
|
36
|
+
try:
|
37
|
+
fspace = h5d.get_space()
|
38
|
+
ds_n_rows = fspace.shape[0]
|
39
|
+
except AttributeError as e:
|
40
|
+
msg = "does not seem to be an HDF5 dataset"
|
41
|
+
raise LH5DecodeError(msg, fname, oname) from e
|
42
|
+
|
43
|
+
if idx is not None:
|
44
|
+
if len(idx) > 0 and idx[-1] >= ds_n_rows:
|
45
|
+
log.warning("idx indexed past the end of the array in the file. Culling...")
|
46
|
+
n_rows_to_read = bisect_left(idx[0], ds_n_rows)
|
47
|
+
idx = (idx[:n_rows_to_read],)
|
48
|
+
if len(idx) == 0:
|
49
|
+
log.warning("idx empty after culling.")
|
50
|
+
n_rows_to_read = len(idx)
|
51
|
+
else:
|
52
|
+
n_rows_to_read = ds_n_rows - start_row
|
53
|
+
if n_rows_to_read > n_rows:
|
54
|
+
n_rows_to_read = n_rows
|
55
|
+
|
56
|
+
if idx is None:
|
57
|
+
fspace.select_hyperslab(
|
58
|
+
(start_row,) + (0,) * (h5d.rank - 1),
|
59
|
+
(1,) * h5d.rank,
|
60
|
+
None,
|
61
|
+
(n_rows_to_read,) + fspace.shape[1:],
|
62
|
+
)
|
63
|
+
elif use_h5idx:
|
64
|
+
# Note that h5s will automatically merge adjacent elements into a range
|
65
|
+
fspace.select_none()
|
66
|
+
for i in idx:
|
67
|
+
fspace.select_hyperslab(
|
68
|
+
(i,) + (0,) * (h5d.rank - 1),
|
69
|
+
(1,) * h5d.rank,
|
70
|
+
None,
|
71
|
+
(1,) + fspace.shape[1:],
|
72
|
+
h5py.h5s.SELECT_OR,
|
73
|
+
)
|
74
|
+
|
75
|
+
# Now read the array
|
76
|
+
if obj_buf is not None and n_rows_to_read > 0:
|
77
|
+
buf_size = obj_buf_start + n_rows_to_read
|
78
|
+
if len(obj_buf) < buf_size:
|
79
|
+
obj_buf.resize(buf_size)
|
80
|
+
dest_sel = np.s_[obj_buf_start:buf_size]
|
81
|
+
|
82
|
+
if idx is None or use_h5idx:
|
83
|
+
mspace = h5py.h5s.create_simple(obj_buf.nda.shape)
|
84
|
+
mspace.select_hyperslab(
|
85
|
+
(obj_buf_start,) + (0,) * (h5d.rank - 1),
|
86
|
+
(1,) * h5d.rank,
|
87
|
+
None,
|
88
|
+
(n_rows_to_read,) + fspace.shape[1:],
|
89
|
+
)
|
90
|
+
h5d.read(mspace, fspace, obj_buf.nda)
|
91
|
+
else:
|
92
|
+
tmp = np.empty(fspace.shape, h5d.dtype)
|
93
|
+
h5d.read(fspace, fspace, tmp)
|
94
|
+
obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
|
95
|
+
nda = obj_buf.nda
|
96
|
+
elif n_rows == 0:
|
97
|
+
tmp_shape = (0,) + h5d.shape[1:]
|
98
|
+
nda = np.empty(tmp_shape, h5d.dtype)
|
99
|
+
else:
|
100
|
+
mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
|
101
|
+
nda = np.empty(mspace.shape, h5d.dtype)
|
102
|
+
if idx is None or use_h5idx:
|
103
|
+
h5d.read(mspace, fspace, nda)
|
104
|
+
else:
|
105
|
+
tmp = np.empty(fspace.shape, h5d.dtype)
|
106
|
+
h5d.read(fspace, fspace, tmp)
|
107
|
+
nda[:, ...] = tmp[idx, ...]
|
108
|
+
|
109
|
+
# Finally, set attributes and return objects
|
110
|
+
attrs = read_attrs(h5d, fname, oname)
|
111
|
+
|
112
|
+
# special handling for bools
|
113
|
+
# (c and Julia store as uint8 so cast to bool)
|
114
|
+
if datatype.get_nested_datatype_string(attrs["datatype"]) == "bool":
|
115
|
+
nda = nda.astype(np.bool_)
|
116
|
+
|
117
|
+
return (nda, attrs, n_rows_to_read)
|