legend-pydataobj 1.9.0__tar.gz → 1.10.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/PKG-INFO +2 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/pyproject.toml +3 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/PKG-INFO +2 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/requires.txt +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/_version.py +2 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/radware.py +8 -16
- legend_pydataobj-1.10.1/src/lgdo/lh5/_serializers/read/array.py +34 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/composite.py +67 -78
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/encoded.py +31 -9
- legend_pydataobj-1.10.1/src/lgdo/lh5/_serializers/read/ndarray.py +116 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/scalar.py +10 -3
- legend_pydataobj-1.10.1/src/lgdo/lh5/_serializers/read/utils.py +174 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/array.py +6 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/composite.py +14 -5
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/scalar.py +6 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/core.py +81 -7
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/exceptions.py +3 -3
- legend_pydataobj-1.10.1/src/lgdo/lh5/iterator.py +498 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/store.py +116 -12
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/tools.py +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/utils.py +29 -44
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/histogram.py +122 -6
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/table.py +2 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/vectorofvectors.py +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/conftest.py +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_core.py +6 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_iterator.py +83 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_store.py +87 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_write.py +12 -2
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_histogram.py +106 -4
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_vectorofvectors.py +1 -1
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_vovutils.py +1 -1
- legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/array.py +0 -34
- legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/ndarray.py +0 -103
- legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/utils.py +0 -12
- legend_pydataobj-1.9.0/src/lgdo/lh5/iterator.py +0 -314
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/LICENSE +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/README.md +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/setup.cfg +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/cli.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/base.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/generic.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/varlen.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lgdo_utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/datatype.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5_store.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/logging.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/__init__.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/array.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/encoded.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/fixedsizearray.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/lgdo.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/scalar.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/struct.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/vovutils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/waveformtable.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/units.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_compression.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_radware_sigcompress.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_str2wfcodec.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/conftest.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/conftest.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_datatype.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_tools.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/test_cli.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/test_lgdo_utils.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_array.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_arrayofequalsizedarrays.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_encoded.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_fixedsizearray.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_representations.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_scalar.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_struct.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_table.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_table_eval.py +0 -0
- {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.1
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -700,7 +700,7 @@ License-File: LICENSE
|
|
700
700
|
Requires-Dist: awkward>=2
|
701
701
|
Requires-Dist: awkward-pandas
|
702
702
|
Requires-Dist: colorlog
|
703
|
-
Requires-Dist: h5py>=3.
|
703
|
+
Requires-Dist: h5py>=3.10
|
704
704
|
Requires-Dist: hdf5plugin
|
705
705
|
Requires-Dist: hist
|
706
706
|
Requires-Dist: numba!=0.53.*,!=0.54.*
|
@@ -34,7 +34,7 @@ dependencies = [
|
|
34
34
|
"awkward>=2",
|
35
35
|
"awkward-pandas",
|
36
36
|
"colorlog",
|
37
|
-
"h5py>=3.
|
37
|
+
"h5py>=3.10",
|
38
38
|
"hdf5plugin",
|
39
39
|
"hist",
|
40
40
|
"numba!=0.53.*,!=0.54.*",
|
@@ -137,6 +137,7 @@ ignore = [
|
|
137
137
|
"PLR2004", # Magic value used in comparison
|
138
138
|
"ISC001", # Conflicts with formatter
|
139
139
|
"PT011",
|
140
|
+
"RUF013", # complains if you default to None for an asinine reason
|
140
141
|
]
|
141
142
|
isort.required-imports = ["from __future__ import annotations"]
|
142
143
|
# Uncomment if using a _compat.typing backport
|
@@ -145,6 +146,7 @@ isort.required-imports = ["from __future__ import annotations"]
|
|
145
146
|
[tool.ruff.lint.per-file-ignores]
|
146
147
|
"tests/**" = ["T20"]
|
147
148
|
"noxfile.py" = ["T20"]
|
149
|
+
"docs/source/notebooks/*" = ["T201", "E402"]
|
148
150
|
|
149
151
|
[tool.pylint]
|
150
152
|
py-version = "3.8"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: legend_pydataobj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.10.1
|
4
4
|
Summary: LEGEND Python Data Objects
|
5
5
|
Author: The LEGEND Collaboration
|
6
6
|
Maintainer: The LEGEND Collaboration
|
@@ -700,7 +700,7 @@ License-File: LICENSE
|
|
700
700
|
Requires-Dist: awkward>=2
|
701
701
|
Requires-Dist: awkward-pandas
|
702
702
|
Requires-Dist: colorlog
|
703
|
-
Requires-Dist: h5py>=3.
|
703
|
+
Requires-Dist: h5py>=3.10
|
704
704
|
Requires-Dist: hdf5plugin
|
705
705
|
Requires-Dist: hist
|
706
706
|
Requires-Dist: numba!=0.53.*,!=0.54.*
|
@@ -441,15 +441,11 @@ def _radware_sigcompress_encode(
|
|
441
441
|
while (i < sig_in.size) and (i < j + 48):
|
442
442
|
si_i = int16(sig_in[i] + shift)
|
443
443
|
si_im1 = int16(sig_in[i - 1] + shift)
|
444
|
-
|
445
|
-
|
446
|
-
if min1 > si_i:
|
447
|
-
min1 = si_i
|
444
|
+
max1 = max(max1, si_i)
|
445
|
+
min1 = min(min1, si_i)
|
448
446
|
ds = si_i - si_im1
|
449
|
-
|
450
|
-
|
451
|
-
if min2 > ds:
|
452
|
-
min2 = ds
|
447
|
+
max2 = max(max2, ds)
|
448
|
+
min2 = min(min2, ds)
|
453
449
|
nw += 1
|
454
450
|
i += 1
|
455
451
|
if max1 - min1 <= max2 - min2: # use absolute values
|
@@ -460,15 +456,13 @@ def _radware_sigcompress_encode(
|
|
460
456
|
i < j + 128
|
461
457
|
): # FIXME: 128 could be tuned better?
|
462
458
|
si_i = int16(sig_in[i] + shift)
|
463
|
-
|
464
|
-
max1 = si_i
|
459
|
+
max1 = max(max1, si_i)
|
465
460
|
dd1 = max1 - min1
|
466
461
|
if min1 > si_i:
|
467
462
|
dd1 = max1 - si_i
|
468
463
|
if dd1 > mask[nb1]:
|
469
464
|
break
|
470
|
-
|
471
|
-
min1 = si_i
|
465
|
+
min1 = min(min1, si_i)
|
472
466
|
nw += 1
|
473
467
|
i += 1
|
474
468
|
else: # use difference values
|
@@ -481,15 +475,13 @@ def _radware_sigcompress_encode(
|
|
481
475
|
si_i = int16(sig_in[i] + shift)
|
482
476
|
si_im1 = int16(sig_in[i - 1] + shift)
|
483
477
|
ds = si_i - si_im1
|
484
|
-
|
485
|
-
max2 = ds
|
478
|
+
max2 = max(max2, ds)
|
486
479
|
dd2 = max2 - min2
|
487
480
|
if min2 > ds:
|
488
481
|
dd2 = max2 - ds
|
489
482
|
if dd2 > mask[nb2]:
|
490
483
|
break
|
491
|
-
|
492
|
-
min2 = ds
|
484
|
+
min2 = min(min2, ds)
|
493
485
|
nw += 1
|
494
486
|
i += 1
|
495
487
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
|
6
|
+
from . import utils
|
7
|
+
from .ndarray import _h5_read_ndarray
|
8
|
+
|
9
|
+
log = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
def _h5_read_array_generic(type_, h5d, fname, oname, **kwargs):
|
13
|
+
nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, fname, oname, **kwargs)
|
14
|
+
|
15
|
+
obj_buf = kwargs["obj_buf"]
|
16
|
+
|
17
|
+
if obj_buf is None:
|
18
|
+
return type_(nda=nda, attrs=attrs), n_rows_to_read
|
19
|
+
|
20
|
+
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
21
|
+
|
22
|
+
return obj_buf, n_rows_to_read
|
23
|
+
|
24
|
+
|
25
|
+
def _h5_read_array(h5d, fname, oname, **kwargs):
|
26
|
+
return _h5_read_array_generic(Array, h5d, fname, oname, **kwargs)
|
27
|
+
|
28
|
+
|
29
|
+
def _h5_read_fixedsize_array(h5d, fname, oname, **kwargs):
|
30
|
+
return _h5_read_array_generic(FixedSizeArray, h5d, fname, oname, **kwargs)
|
31
|
+
|
32
|
+
|
33
|
+
def _h5_read_array_of_equalsized_arrays(h5d, fname, oname, **kwargs):
|
34
|
+
return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, fname, oname, **kwargs)
|
{legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/composite.py
RENAMED
@@ -23,7 +23,6 @@ from ....types import (
|
|
23
23
|
)
|
24
24
|
from ... import datatype as dtypeutils
|
25
25
|
from ...exceptions import LH5DecodeError
|
26
|
-
from ...utils import read_n_rows
|
27
26
|
from . import utils
|
28
27
|
from .array import (
|
29
28
|
_h5_read_array,
|
@@ -42,6 +41,8 @@ log = logging.getLogger(__name__)
|
|
42
41
|
|
43
42
|
def _h5_read_lgdo(
|
44
43
|
h5o,
|
44
|
+
fname,
|
45
|
+
oname,
|
45
46
|
start_row=0,
|
46
47
|
n_rows=sys.maxsize,
|
47
48
|
idx=None,
|
@@ -51,69 +52,23 @@ def _h5_read_lgdo(
|
|
51
52
|
obj_buf_start=0,
|
52
53
|
decompress=True,
|
53
54
|
):
|
54
|
-
# Handle list-of-files recursively
|
55
|
-
if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
|
56
|
-
lh5_objs = list(h5o)
|
57
|
-
n_rows_read = 0
|
58
|
-
|
59
|
-
for i, _h5o in enumerate(lh5_objs):
|
60
|
-
if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
|
61
|
-
# a list of lists: must be one per file
|
62
|
-
idx_i = idx[i]
|
63
|
-
elif idx is not None:
|
64
|
-
# make idx a proper tuple if it's not one already
|
65
|
-
if not (isinstance(idx, tuple) and len(idx) == 1):
|
66
|
-
idx = (idx,)
|
67
|
-
# idx is a long continuous array
|
68
|
-
n_rows_i = read_n_rows(_h5o)
|
69
|
-
# find the length of the subset of idx that contains indices
|
70
|
-
# that are less than n_rows_i
|
71
|
-
n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
|
72
|
-
# now split idx into idx_i and the remainder
|
73
|
-
idx_i = (idx[0][:n_rows_to_read_i],)
|
74
|
-
idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
|
75
|
-
else:
|
76
|
-
idx_i = None
|
77
|
-
n_rows_i = n_rows - n_rows_read
|
78
|
-
|
79
|
-
obj_buf, n_rows_read_i = _h5_read_lgdo(
|
80
|
-
_h5o,
|
81
|
-
start_row=start_row,
|
82
|
-
n_rows=n_rows_i,
|
83
|
-
idx=idx_i,
|
84
|
-
use_h5idx=use_h5idx,
|
85
|
-
field_mask=field_mask,
|
86
|
-
obj_buf=obj_buf,
|
87
|
-
obj_buf_start=obj_buf_start,
|
88
|
-
decompress=decompress,
|
89
|
-
)
|
90
|
-
|
91
|
-
n_rows_read += n_rows_read_i
|
92
|
-
if n_rows_read >= n_rows or obj_buf is None:
|
93
|
-
return obj_buf, n_rows_read
|
94
|
-
start_row = 0
|
95
|
-
obj_buf_start += n_rows_read_i
|
96
|
-
|
97
|
-
return obj_buf, n_rows_read
|
98
|
-
|
99
55
|
log.debug(
|
100
|
-
f"reading {
|
56
|
+
f"reading {fname}:{oname}[{start_row}:{n_rows}], decompress = {decompress}, "
|
101
57
|
+ (f" with field mask {field_mask}" if field_mask else "")
|
102
58
|
)
|
103
59
|
|
104
|
-
|
105
|
-
if not (isinstance(idx, tuple) and len(idx) == 1) and idx is not None:
|
106
|
-
idx = (idx,)
|
107
|
-
|
60
|
+
attrs = utils.read_attrs(h5o, fname, oname)
|
108
61
|
try:
|
109
|
-
lgdotype = dtypeutils.datatype(
|
62
|
+
lgdotype = dtypeutils.datatype(attrs["datatype"])
|
110
63
|
except KeyError as e:
|
111
64
|
msg = "dataset not in file or missing 'datatype' attribute"
|
112
|
-
raise LH5DecodeError(msg,
|
65
|
+
raise LH5DecodeError(msg, fname, oname) from e
|
113
66
|
|
114
67
|
if lgdotype is Scalar:
|
115
68
|
return _h5_read_scalar(
|
116
69
|
h5o,
|
70
|
+
fname,
|
71
|
+
oname,
|
117
72
|
obj_buf=obj_buf,
|
118
73
|
)
|
119
74
|
|
@@ -125,7 +80,7 @@ def _h5_read_lgdo(
|
|
125
80
|
if len(field_mask) > 0:
|
126
81
|
default = not field_mask[next(iter(field_mask.keys()))]
|
127
82
|
field_mask = defaultdict(lambda: default, field_mask)
|
128
|
-
elif isinstance(field_mask, (list, tuple)):
|
83
|
+
elif isinstance(field_mask, (list, tuple, set)):
|
129
84
|
field_mask = defaultdict(bool, {field: True for field in field_mask})
|
130
85
|
elif not isinstance(field_mask, defaultdict):
|
131
86
|
msg = "bad field_mask type"
|
@@ -134,6 +89,8 @@ def _h5_read_lgdo(
|
|
134
89
|
if lgdotype is Struct:
|
135
90
|
return _h5_read_struct(
|
136
91
|
h5o,
|
92
|
+
fname,
|
93
|
+
oname,
|
137
94
|
start_row=start_row,
|
138
95
|
n_rows=n_rows,
|
139
96
|
idx=idx,
|
@@ -145,20 +102,21 @@ def _h5_read_lgdo(
|
|
145
102
|
# Below here is all array-like types. So trim idx if needed
|
146
103
|
if idx is not None:
|
147
104
|
# check if idx is just an ordered list of the integers if so can ignore
|
148
|
-
if (idx
|
149
|
-
|
150
|
-
n_rows = len(idx[0])
|
105
|
+
if (idx == np.arange(0, len(idx), 1)).all():
|
106
|
+
n_rows = min(n_rows, len(idx))
|
151
107
|
idx = None
|
152
108
|
else:
|
153
109
|
# chop off indices < start_row
|
154
|
-
i_first_valid = bisect.bisect_left(idx
|
155
|
-
idxa = idx[
|
110
|
+
i_first_valid = bisect.bisect_left(idx, start_row)
|
111
|
+
idxa = idx[i_first_valid:]
|
156
112
|
# don't readout more than n_rows indices
|
157
|
-
idx =
|
113
|
+
idx = idxa[:n_rows] # works even if n_rows > len(idxa)
|
158
114
|
|
159
115
|
if lgdotype is Table:
|
160
116
|
return _h5_read_table(
|
161
117
|
h5o,
|
118
|
+
fname,
|
119
|
+
oname,
|
162
120
|
start_row=start_row,
|
163
121
|
n_rows=n_rows,
|
164
122
|
idx=idx,
|
@@ -172,6 +130,8 @@ def _h5_read_lgdo(
|
|
172
130
|
if lgdotype is Histogram:
|
173
131
|
return _h5_read_histogram(
|
174
132
|
h5o,
|
133
|
+
fname,
|
134
|
+
oname,
|
175
135
|
start_row=start_row,
|
176
136
|
n_rows=n_rows,
|
177
137
|
idx=idx,
|
@@ -185,6 +145,8 @@ def _h5_read_lgdo(
|
|
185
145
|
if lgdotype is ArrayOfEncodedEqualSizedArrays:
|
186
146
|
return _h5_read_array_of_encoded_equalsized_arrays(
|
187
147
|
h5o,
|
148
|
+
fname,
|
149
|
+
oname,
|
188
150
|
start_row=start_row,
|
189
151
|
n_rows=n_rows,
|
190
152
|
idx=idx,
|
@@ -197,6 +159,8 @@ def _h5_read_lgdo(
|
|
197
159
|
if lgdotype is VectorOfEncodedVectors:
|
198
160
|
return _h5_read_vector_of_encoded_vectors(
|
199
161
|
h5o,
|
162
|
+
fname,
|
163
|
+
oname,
|
200
164
|
start_row=start_row,
|
201
165
|
n_rows=n_rows,
|
202
166
|
idx=idx,
|
@@ -209,6 +173,8 @@ def _h5_read_lgdo(
|
|
209
173
|
if lgdotype is VectorOfVectors:
|
210
174
|
return _h5_read_vector_of_vectors(
|
211
175
|
h5o,
|
176
|
+
fname,
|
177
|
+
oname,
|
212
178
|
start_row=start_row,
|
213
179
|
n_rows=n_rows,
|
214
180
|
idx=idx,
|
@@ -220,6 +186,8 @@ def _h5_read_lgdo(
|
|
220
186
|
if lgdotype is FixedSizeArray:
|
221
187
|
return _h5_read_fixedsize_array(
|
222
188
|
h5o,
|
189
|
+
fname,
|
190
|
+
oname,
|
223
191
|
start_row=start_row,
|
224
192
|
n_rows=n_rows,
|
225
193
|
idx=idx,
|
@@ -231,6 +199,8 @@ def _h5_read_lgdo(
|
|
231
199
|
if lgdotype is ArrayOfEqualSizedArrays:
|
232
200
|
return _h5_read_array_of_equalsized_arrays(
|
233
201
|
h5o,
|
202
|
+
fname,
|
203
|
+
oname,
|
234
204
|
start_row=start_row,
|
235
205
|
n_rows=n_rows,
|
236
206
|
idx=idx,
|
@@ -242,6 +212,8 @@ def _h5_read_lgdo(
|
|
242
212
|
if lgdotype is Array:
|
243
213
|
return _h5_read_array(
|
244
214
|
h5o,
|
215
|
+
fname,
|
216
|
+
oname,
|
245
217
|
start_row=start_row,
|
246
218
|
n_rows=n_rows,
|
247
219
|
idx=idx,
|
@@ -251,11 +223,13 @@ def _h5_read_lgdo(
|
|
251
223
|
)
|
252
224
|
|
253
225
|
msg = f"no rule to decode {lgdotype.__name__} from LH5"
|
254
|
-
raise LH5DecodeError(msg,
|
226
|
+
raise LH5DecodeError(msg, fname, oname)
|
255
227
|
|
256
228
|
|
257
229
|
def _h5_read_struct(
|
258
230
|
h5g,
|
231
|
+
fname,
|
232
|
+
oname,
|
259
233
|
start_row=0,
|
260
234
|
n_rows=sys.maxsize,
|
261
235
|
idx=None,
|
@@ -268,7 +242,7 @@ def _h5_read_struct(
|
|
268
242
|
# table... Maybe should emit a warning? Or allow them to be
|
269
243
|
# dicts keyed by field name?
|
270
244
|
|
271
|
-
attrs =
|
245
|
+
attrs = utils.read_attrs(h5g, fname, oname)
|
272
246
|
|
273
247
|
# determine fields to be read out
|
274
248
|
all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
|
@@ -286,20 +260,26 @@ def _h5_read_struct(
|
|
286
260
|
for field in selected_fields:
|
287
261
|
# support for integer keys
|
288
262
|
field_key = int(field) if attrs.get("int_keys") else str(field)
|
263
|
+
h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
|
289
264
|
obj_dict[field_key], _ = _h5_read_lgdo(
|
290
|
-
|
265
|
+
h5o,
|
266
|
+
fname,
|
267
|
+
f"{oname}/{field}",
|
291
268
|
start_row=start_row,
|
292
269
|
n_rows=n_rows,
|
293
270
|
idx=idx,
|
294
271
|
use_h5idx=use_h5idx,
|
295
272
|
decompress=decompress,
|
296
273
|
)
|
274
|
+
h5o.close()
|
297
275
|
|
298
276
|
return Struct(obj_dict=obj_dict, attrs=attrs), 1
|
299
277
|
|
300
278
|
|
301
279
|
def _h5_read_table(
|
302
280
|
h5g,
|
281
|
+
fname,
|
282
|
+
oname,
|
303
283
|
start_row=0,
|
304
284
|
n_rows=sys.maxsize,
|
305
285
|
idx=None,
|
@@ -311,9 +291,9 @@ def _h5_read_table(
|
|
311
291
|
):
|
312
292
|
if obj_buf is not None and not isinstance(obj_buf, Table):
|
313
293
|
msg = "provided object buffer is not a Table"
|
314
|
-
raise LH5DecodeError(msg,
|
294
|
+
raise LH5DecodeError(msg, fname, oname)
|
315
295
|
|
316
|
-
attrs =
|
296
|
+
attrs = utils.read_attrs(h5g, fname, oname)
|
317
297
|
|
318
298
|
# determine fields to be read out
|
319
299
|
all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
|
@@ -334,12 +314,15 @@ def _h5_read_table(
|
|
334
314
|
if obj_buf is not None:
|
335
315
|
if not isinstance(obj_buf, Table) or field not in obj_buf:
|
336
316
|
msg = "provided object buffer is not a Table or columns are missing"
|
337
|
-
raise LH5DecodeError(msg,
|
317
|
+
raise LH5DecodeError(msg, fname, oname)
|
338
318
|
|
339
319
|
fld_buf = obj_buf[field]
|
340
320
|
|
321
|
+
h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
|
341
322
|
col_dict[field], n_rows_read = _h5_read_lgdo(
|
342
|
-
|
323
|
+
h5o,
|
324
|
+
fname,
|
325
|
+
f"{oname}/{field}",
|
343
326
|
start_row=start_row,
|
344
327
|
n_rows=n_rows,
|
345
328
|
idx=idx,
|
@@ -348,6 +331,7 @@ def _h5_read_table(
|
|
348
331
|
obj_buf_start=obj_buf_start,
|
349
332
|
decompress=decompress,
|
350
333
|
)
|
334
|
+
h5o.close()
|
351
335
|
|
352
336
|
if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
|
353
337
|
obj_buf.resize(obj_buf_start + n_rows_read)
|
@@ -359,12 +343,12 @@ def _h5_read_table(
|
|
359
343
|
n_rows_read = rows_read[0]
|
360
344
|
else:
|
361
345
|
n_rows_read = 0
|
362
|
-
log.warning(f"Table '{
|
346
|
+
log.warning(f"Table '{oname}' has no fields specified by {field_mask=}")
|
363
347
|
|
364
348
|
for n in rows_read[1:]:
|
365
349
|
if n != n_rows_read:
|
366
350
|
log.warning(
|
367
|
-
f"Table '{
|
351
|
+
f"Table '{oname}' got strange n_rows_read = {n}, "
|
368
352
|
"{n_rows_read} was expected ({rows_read})"
|
369
353
|
)
|
370
354
|
|
@@ -396,13 +380,15 @@ def _h5_read_table(
|
|
396
380
|
obj_buf.loc = obj_buf_start + n_rows_read
|
397
381
|
|
398
382
|
# check attributes
|
399
|
-
utils.check_obj_buf_attrs(obj_buf.attrs, attrs,
|
383
|
+
utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
|
400
384
|
|
401
385
|
return obj_buf, n_rows_read
|
402
386
|
|
403
387
|
|
404
388
|
def _h5_read_histogram(
|
405
389
|
h5g,
|
390
|
+
fname,
|
391
|
+
oname,
|
406
392
|
start_row=0,
|
407
393
|
n_rows=sys.maxsize,
|
408
394
|
idx=None,
|
@@ -414,17 +400,20 @@ def _h5_read_histogram(
|
|
414
400
|
):
|
415
401
|
if obj_buf is not None or obj_buf_start != 0:
|
416
402
|
msg = "reading a histogram into an existing object buffer is not supported"
|
417
|
-
raise LH5DecodeError(msg,
|
403
|
+
raise LH5DecodeError(msg, fname, oname)
|
418
404
|
|
419
405
|
struct, n_rows_read = _h5_read_struct(
|
420
406
|
h5g,
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
407
|
+
fname,
|
408
|
+
oname,
|
409
|
+
start_row=start_row,
|
410
|
+
n_rows=n_rows,
|
411
|
+
idx=idx,
|
412
|
+
use_h5idx=use_h5idx,
|
413
|
+
field_mask=field_mask,
|
414
|
+
decompress=decompress,
|
427
415
|
)
|
416
|
+
|
428
417
|
binning = []
|
429
418
|
for _, a in struct.binning.items():
|
430
419
|
be = a.binedges
|
@@ -434,7 +423,7 @@ def _h5_read_histogram(
|
|
434
423
|
b = (be, None, None, None, a.closedleft.value)
|
435
424
|
else:
|
436
425
|
msg = "unexpected binning of histogram"
|
437
|
-
raise LH5DecodeError(msg,
|
426
|
+
raise LH5DecodeError(msg, fname, oname)
|
438
427
|
ax = Histogram.Axis(*b)
|
439
428
|
# copy attrs to "clone" the "whole" struct.
|
440
429
|
ax.attrs = a.getattrs(datatype=True)
|
{legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/encoded.py
RENAMED
@@ -3,6 +3,8 @@ from __future__ import annotations
|
|
3
3
|
import logging
|
4
4
|
import sys
|
5
5
|
|
6
|
+
import h5py
|
7
|
+
|
6
8
|
from .... import compression as compress
|
7
9
|
from ....types import (
|
8
10
|
ArrayOfEncodedEqualSizedArrays,
|
@@ -13,6 +15,7 @@ from .array import (
|
|
13
15
|
_h5_read_array,
|
14
16
|
)
|
15
17
|
from .scalar import _h5_read_scalar
|
18
|
+
from .utils import read_attrs
|
16
19
|
from .vector_of_vectors import _h5_read_vector_of_vectors
|
17
20
|
|
18
21
|
log = logging.getLogger(__name__)
|
@@ -20,21 +23,29 @@ log = logging.getLogger(__name__)
|
|
20
23
|
|
21
24
|
def _h5_read_array_of_encoded_equalsized_arrays(
|
22
25
|
h5g,
|
26
|
+
fname,
|
27
|
+
oname,
|
23
28
|
**kwargs,
|
24
29
|
):
|
25
|
-
return _h5_read_encoded_array(
|
30
|
+
return _h5_read_encoded_array(
|
31
|
+
ArrayOfEncodedEqualSizedArrays, h5g, fname, oname, **kwargs
|
32
|
+
)
|
26
33
|
|
27
34
|
|
28
35
|
def _h5_read_vector_of_encoded_vectors(
|
29
36
|
h5g,
|
37
|
+
fname,
|
38
|
+
oname,
|
30
39
|
**kwargs,
|
31
40
|
):
|
32
|
-
return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
|
41
|
+
return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, fname, oname, **kwargs)
|
33
42
|
|
34
43
|
|
35
44
|
def _h5_read_encoded_array(
|
36
45
|
lgdotype,
|
37
46
|
h5g,
|
47
|
+
fname,
|
48
|
+
oname,
|
38
49
|
start_row=0,
|
39
50
|
n_rows=sys.maxsize,
|
40
51
|
idx=None,
|
@@ -45,11 +56,11 @@ def _h5_read_encoded_array(
|
|
45
56
|
):
|
46
57
|
if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
|
47
58
|
msg = f"unsupported read of encoded type {lgdotype.__name__}"
|
48
|
-
raise LH5DecodeError(msg,
|
59
|
+
raise LH5DecodeError(msg, fname, oname)
|
49
60
|
|
50
61
|
if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
|
51
62
|
msg = f"object buffer is not a {lgdotype.__name__}"
|
52
|
-
raise LH5DecodeError(msg,
|
63
|
+
raise LH5DecodeError(msg, fname, oname)
|
53
64
|
|
54
65
|
# read out decoded_size, either a Scalar or an Array
|
55
66
|
decoded_size_buf = encoded_data_buf = None
|
@@ -58,8 +69,11 @@ def _h5_read_encoded_array(
|
|
58
69
|
encoded_data_buf = obj_buf.encoded_data
|
59
70
|
|
60
71
|
if lgdotype is VectorOfEncodedVectors:
|
72
|
+
h5o = h5py.h5o.open(h5g, b"decoded_size")
|
61
73
|
decoded_size, _ = _h5_read_array(
|
62
|
-
|
74
|
+
h5o,
|
75
|
+
fname,
|
76
|
+
f"{oname}/decoded_size",
|
63
77
|
start_row=start_row,
|
64
78
|
n_rows=n_rows,
|
65
79
|
idx=idx,
|
@@ -67,16 +81,23 @@ def _h5_read_encoded_array(
|
|
67
81
|
obj_buf=None if decompress else decoded_size_buf,
|
68
82
|
obj_buf_start=0 if decompress else obj_buf_start,
|
69
83
|
)
|
70
|
-
|
84
|
+
h5o.close()
|
71
85
|
else:
|
86
|
+
h5o = h5py.h5o.open(h5g, b"decoded_size")
|
72
87
|
decoded_size, _ = _h5_read_scalar(
|
73
|
-
|
88
|
+
h5o,
|
89
|
+
fname,
|
90
|
+
f"{oname}/decoded_size",
|
74
91
|
obj_buf=None if decompress else decoded_size_buf,
|
75
92
|
)
|
93
|
+
h5o.close()
|
76
94
|
|
77
95
|
# read out encoded_data, a VectorOfVectors
|
96
|
+
h5o = h5py.h5o.open(h5g, b"encoded_data")
|
78
97
|
encoded_data, n_rows_read = _h5_read_vector_of_vectors(
|
79
|
-
|
98
|
+
h5o,
|
99
|
+
fname,
|
100
|
+
f"{oname}/encoded_data",
|
80
101
|
start_row=start_row,
|
81
102
|
n_rows=n_rows,
|
82
103
|
idx=idx,
|
@@ -84,6 +105,7 @@ def _h5_read_encoded_array(
|
|
84
105
|
obj_buf=None if decompress else encoded_data_buf,
|
85
106
|
obj_buf_start=0 if decompress else obj_buf_start,
|
86
107
|
)
|
108
|
+
h5o.close()
|
87
109
|
|
88
110
|
# return the still encoded data in the buffer object, if there
|
89
111
|
if obj_buf is not None and not decompress:
|
@@ -93,7 +115,7 @@ def _h5_read_encoded_array(
|
|
93
115
|
rawdata = lgdotype(
|
94
116
|
encoded_data=encoded_data,
|
95
117
|
decoded_size=decoded_size,
|
96
|
-
attrs=
|
118
|
+
attrs=read_attrs(h5g, fname, oname),
|
97
119
|
)
|
98
120
|
|
99
121
|
# already return if no decompression is requested
|