legend-pydataobj 1.8.1__tar.gz → 1.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/PKG-INFO +3 -2
  2. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/pyproject.toml +2 -1
  3. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/PKG-INFO +3 -2
  4. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/SOURCES.txt +2 -0
  5. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/requires.txt +2 -1
  6. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/__init__.py +4 -0
  7. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/_version.py +2 -2
  8. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/__init__.py +2 -0
  9. legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/array.py +34 -0
  10. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/composite.py +122 -70
  11. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/encoded.py +31 -9
  12. legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/ndarray.py +117 -0
  13. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/scalar.py +10 -3
  14. legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/utils.py +35 -0
  15. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +35 -13
  16. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/array.py +6 -1
  17. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/composite.py +20 -4
  18. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/scalar.py +6 -1
  19. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/core.py +78 -7
  20. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/datatype.py +1 -0
  21. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/exceptions.py +3 -3
  22. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/store.py +101 -11
  23. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/tools.py +1 -1
  24. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/utils.py +13 -2
  25. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5_store.py +1 -0
  26. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/__init__.py +2 -0
  27. legend_pydataobj-1.10.0/src/lgdo/types/histogram.py +419 -0
  28. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/table.py +1 -1
  29. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/conftest.py +1 -1
  30. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_core.py +6 -0
  31. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_datatype.py +1 -0
  32. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_write.py +133 -0
  33. legend_pydataobj-1.10.0/tests/types/test_histogram.py +298 -0
  34. legend_pydataobj-1.8.1/src/lgdo/lh5/_serializers/read/array.py +0 -34
  35. legend_pydataobj-1.8.1/src/lgdo/lh5/_serializers/read/ndarray.py +0 -103
  36. legend_pydataobj-1.8.1/src/lgdo/lh5/_serializers/read/utils.py +0 -12
  37. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/LICENSE +0 -0
  38. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/README.md +0 -0
  39. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/setup.cfg +0 -0
  40. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  41. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
  42. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  43. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  44. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/cli.py +0 -0
  45. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/__init__.py +0 -0
  46. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/base.py +0 -0
  47. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/generic.py +0 -0
  48. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/radware.py +0 -0
  49. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/utils.py +0 -0
  50. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/compression/varlen.py +0 -0
  51. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lgdo_utils.py +0 -0
  52. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/__init__.py +0 -0
  53. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  54. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  55. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
  56. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/lh5/iterator.py +0 -0
  57. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/logging.py +0 -0
  58. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/array.py +0 -0
  59. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  60. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/encoded.py +0 -0
  61. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/fixedsizearray.py +0 -0
  62. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/lgdo.py +0 -0
  63. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/scalar.py +0 -0
  64. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/struct.py +0 -0
  65. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/vectorofvectors.py +0 -0
  66. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/vovutils.py +0 -0
  67. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/types/waveformtable.py +0 -0
  68. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/units.py +0 -0
  69. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/src/lgdo/utils.py +0 -0
  70. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/conftest.py +0 -0
  71. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  72. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  73. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_compression.py +0 -0
  74. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_radware_sigcompress.py +0 -0
  75. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_str2wfcodec.py +0 -0
  76. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
  77. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/conftest.py +0 -0
  78. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_iterator.py +0 -0
  79. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_store.py +0 -0
  80. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_tools.py +0 -0
  81. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_utils.py +0 -0
  82. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/test_cli.py +0 -0
  83. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/test_lgdo_utils.py +0 -0
  84. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_array.py +0 -0
  85. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  86. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_encoded.py +0 -0
  87. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_fixedsizearray.py +0 -0
  88. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_representations.py +0 -0
  89. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_scalar.py +0 -0
  90. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_struct.py +0 -0
  91. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_table.py +0 -0
  92. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_table_eval.py +0 -0
  93. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_vectorofvectors.py +0 -0
  94. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_vovutils.py +0 -0
  95. {legend_pydataobj-1.8.1 → legend_pydataobj-1.10.0}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.8.1
3
+ Version: 1.10.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -700,8 +700,9 @@ License-File: LICENSE
700
700
  Requires-Dist: awkward>=2
701
701
  Requires-Dist: awkward-pandas
702
702
  Requires-Dist: colorlog
703
- Requires-Dist: h5py>=3.2
703
+ Requires-Dist: h5py>=3.10
704
704
  Requires-Dist: hdf5plugin
705
+ Requires-Dist: hist
705
706
  Requires-Dist: numba!=0.53.*,!=0.54.*
706
707
  Requires-Dist: numexpr
707
708
  Requires-Dist: numpy>=1.21
@@ -34,8 +34,9 @@ dependencies = [
34
34
  "awkward>=2",
35
35
  "awkward-pandas",
36
36
  "colorlog",
37
- "h5py>=3.2",
37
+ "h5py>=3.10",
38
38
  "hdf5plugin",
39
+ "hist",
39
40
  "numba!=0.53.*,!=0.54.*",
40
41
  "numexpr",
41
42
  "numpy>=1.21",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.8.1
3
+ Version: 1.10.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -700,8 +700,9 @@ License-File: LICENSE
700
700
  Requires-Dist: awkward>=2
701
701
  Requires-Dist: awkward-pandas
702
702
  Requires-Dist: colorlog
703
- Requires-Dist: h5py>=3.2
703
+ Requires-Dist: h5py>=3.10
704
704
  Requires-Dist: hdf5plugin
705
+ Requires-Dist: hist
705
706
  Requires-Dist: numba!=0.53.*,!=0.54.*
706
707
  Requires-Dist: numexpr
707
708
  Requires-Dist: numpy>=1.21
@@ -49,6 +49,7 @@ src/lgdo/types/array.py
49
49
  src/lgdo/types/arrayofequalsizedarrays.py
50
50
  src/lgdo/types/encoded.py
51
51
  src/lgdo/types/fixedsizearray.py
52
+ src/lgdo/types/histogram.py
52
53
  src/lgdo/types/lgdo.py
53
54
  src/lgdo/types/scalar.py
54
55
  src/lgdo/types/struct.py
@@ -78,6 +79,7 @@ tests/types/test_array.py
78
79
  tests/types/test_arrayofequalsizedarrays.py
79
80
  tests/types/test_encoded.py
80
81
  tests/types/test_fixedsizearray.py
82
+ tests/types/test_histogram.py
81
83
  tests/types/test_representations.py
82
84
  tests/types/test_scalar.py
83
85
  tests/types/test_struct.py
@@ -1,8 +1,9 @@
1
1
  awkward>=2
2
2
  awkward-pandas
3
3
  colorlog
4
- h5py>=3.2
4
+ h5py>=3.10
5
5
  hdf5plugin
6
+ hist
6
7
  numba!=0.53.*,!=0.54.*
7
8
  numexpr
8
9
  numpy>=1.21
@@ -33,6 +33,8 @@ basic data object classes are:
33
33
  :class:`dict`
34
34
  * :class:`.Table`: a :class:`.Struct` whose elements ("columns") are all array
35
35
  types with the same length (number of rows)
36
+ * :class:`.Histogram`: holds an array of histogrammed data, and the associated
37
+ binning of arbitrary dimensionality.
36
38
 
37
39
  Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO
38
40
  is done via the class :class:`.lh5_store.LH5Store`. LH5 files can also be
@@ -50,6 +52,7 @@ from .types import (
50
52
  ArrayOfEncodedEqualSizedArrays,
51
53
  ArrayOfEqualSizedArrays,
52
54
  FixedSizeArray,
55
+ Histogram,
53
56
  Scalar,
54
57
  Struct,
55
58
  Table,
@@ -63,6 +66,7 @@ __all__ = [
63
66
  "ArrayOfEqualSizedArrays",
64
67
  "ArrayOfEncodedEqualSizedArrays",
65
68
  "FixedSizeArray",
69
+ "Histogram",
66
70
  "LGDO",
67
71
  "Scalar",
68
72
  "Struct",
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.8.1'
16
- __version_tuple__ = version_tuple = (1, 8, 1)
15
+ __version__ = version = '1.10.0'
16
+ __version_tuple__ = version_tuple = (1, 10, 0)
@@ -7,6 +7,7 @@ from .read.array import (
7
7
  _h5_read_ndarray,
8
8
  )
9
9
  from .read.composite import (
10
+ _h5_read_histogram,
10
11
  _h5_read_lgdo,
11
12
  _h5_read_struct,
12
13
  _h5_read_table,
@@ -32,6 +33,7 @@ __all__ = [
32
33
  "_h5_read_array_of_equalsized_arrays",
33
34
  "_h5_read_struct",
34
35
  "_h5_read_table",
36
+ "_h5_read_histogram",
35
37
  "_h5_read_scalar",
36
38
  "_h5_read_array_of_encoded_equalsized_arrays",
37
39
  "_h5_read_vector_of_encoded_vectors",
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
6
+ from . import utils
7
+ from .ndarray import _h5_read_ndarray
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def _h5_read_array_generic(type_, h5d, fname, oname, **kwargs):
13
+ nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, fname, oname, **kwargs)
14
+
15
+ obj_buf = kwargs["obj_buf"]
16
+
17
+ if obj_buf is None:
18
+ return type_(nda=nda, attrs=attrs), n_rows_to_read
19
+
20
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
21
+
22
+ return obj_buf, n_rows_to_read
23
+
24
+
25
+ def _h5_read_array(h5d, fname, oname, **kwargs):
26
+ return _h5_read_array_generic(Array, h5d, fname, oname, **kwargs)
27
+
28
+
29
+ def _h5_read_fixedsize_array(h5d, fname, oname, **kwargs):
30
+ return _h5_read_array_generic(FixedSizeArray, h5d, fname, oname, **kwargs)
31
+
32
+
33
+ def _h5_read_array_of_equalsized_arrays(h5d, fname, oname, **kwargs):
34
+ return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, fname, oname, **kwargs)
@@ -13,6 +13,7 @@ from ....types import (
13
13
  ArrayOfEncodedEqualSizedArrays,
14
14
  ArrayOfEqualSizedArrays,
15
15
  FixedSizeArray,
16
+ Histogram,
16
17
  Scalar,
17
18
  Struct,
18
19
  Table,
@@ -22,7 +23,6 @@ from ....types import (
22
23
  )
23
24
  from ... import datatype as dtypeutils
24
25
  from ...exceptions import LH5DecodeError
25
- from ...utils import read_n_rows
26
26
  from . import utils
27
27
  from .array import (
28
28
  _h5_read_array,
@@ -41,6 +41,8 @@ log = logging.getLogger(__name__)
41
41
 
42
42
  def _h5_read_lgdo(
43
43
  h5o,
44
+ fname,
45
+ oname,
44
46
  start_row=0,
45
47
  n_rows=sys.maxsize,
46
48
  idx=None,
@@ -50,69 +52,23 @@ def _h5_read_lgdo(
50
52
  obj_buf_start=0,
51
53
  decompress=True,
52
54
  ):
53
- # Handle list-of-files recursively
54
- if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
55
- lh5_objs = list(h5o)
56
- n_rows_read = 0
57
-
58
- for i, _h5o in enumerate(lh5_objs):
59
- if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
60
- # a list of lists: must be one per file
61
- idx_i = idx[i]
62
- elif idx is not None:
63
- # make idx a proper tuple if it's not one already
64
- if not (isinstance(idx, tuple) and len(idx) == 1):
65
- idx = (idx,)
66
- # idx is a long continuous array
67
- n_rows_i = read_n_rows(_h5o)
68
- # find the length of the subset of idx that contains indices
69
- # that are less than n_rows_i
70
- n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
71
- # now split idx into idx_i and the remainder
72
- idx_i = (idx[0][:n_rows_to_read_i],)
73
- idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
74
- else:
75
- idx_i = None
76
- n_rows_i = n_rows - n_rows_read
77
-
78
- obj_buf, n_rows_read_i = _h5_read_lgdo(
79
- _h5o,
80
- start_row=start_row,
81
- n_rows=n_rows_i,
82
- idx=idx_i,
83
- use_h5idx=use_h5idx,
84
- field_mask=field_mask,
85
- obj_buf=obj_buf,
86
- obj_buf_start=obj_buf_start,
87
- decompress=decompress,
88
- )
89
-
90
- n_rows_read += n_rows_read_i
91
- if n_rows_read >= n_rows or obj_buf is None:
92
- return obj_buf, n_rows_read
93
- start_row = 0
94
- obj_buf_start += n_rows_read_i
95
-
96
- return obj_buf, n_rows_read
97
-
98
55
  log.debug(
99
- f"reading {h5o.file.filename}:{h5o.name}[{start_row}:{n_rows}], decompress = {decompress}, "
56
+ f"reading {fname}:{oname}[{start_row}:{n_rows}], decompress = {decompress}, "
100
57
  + (f" with field mask {field_mask}" if field_mask else "")
101
58
  )
102
59
 
103
- # make idx a proper tuple if it's not one already
104
- if not (isinstance(idx, tuple) and len(idx) == 1) and idx is not None:
105
- idx = (idx,)
106
-
60
+ attrs = utils.read_attrs(h5o, fname, oname)
107
61
  try:
108
- lgdotype = dtypeutils.datatype(h5o.attrs["datatype"])
62
+ lgdotype = dtypeutils.datatype(attrs["datatype"])
109
63
  except KeyError as e:
110
64
  msg = "dataset not in file or missing 'datatype' attribute"
111
- raise LH5DecodeError(msg, h5o) from e
65
+ raise LH5DecodeError(msg, fname, oname) from e
112
66
 
113
67
  if lgdotype is Scalar:
114
68
  return _h5_read_scalar(
115
69
  h5o,
70
+ fname,
71
+ oname,
116
72
  obj_buf=obj_buf,
117
73
  )
118
74
 
@@ -124,7 +80,7 @@ def _h5_read_lgdo(
124
80
  if len(field_mask) > 0:
125
81
  default = not field_mask[next(iter(field_mask.keys()))]
126
82
  field_mask = defaultdict(lambda: default, field_mask)
127
- elif isinstance(field_mask, (list, tuple)):
83
+ elif isinstance(field_mask, (list, tuple, set)):
128
84
  field_mask = defaultdict(bool, {field: True for field in field_mask})
129
85
  elif not isinstance(field_mask, defaultdict):
130
86
  msg = "bad field_mask type"
@@ -133,6 +89,8 @@ def _h5_read_lgdo(
133
89
  if lgdotype is Struct:
134
90
  return _h5_read_struct(
135
91
  h5o,
92
+ fname,
93
+ oname,
136
94
  start_row=start_row,
137
95
  n_rows=n_rows,
138
96
  idx=idx,
@@ -144,20 +102,37 @@ def _h5_read_lgdo(
144
102
  # Below here is all array-like types. So trim idx if needed
145
103
  if idx is not None:
146
104
  # check if idx is just an ordered list of the integers if so can ignore
147
- if (idx[0] == np.arange(0, len(idx[0]), 1)).all():
148
- if n_rows > len(idx[0]):
149
- n_rows = len(idx[0])
105
+ if (idx == np.arange(0, len(idx), 1)).all():
106
+ if n_rows > len(idx):
107
+ n_rows = len(idx)
150
108
  idx = None
151
109
  else:
152
110
  # chop off indices < start_row
153
- i_first_valid = bisect.bisect_left(idx[0], start_row)
154
- idxa = idx[0][i_first_valid:]
111
+ i_first_valid = bisect.bisect_left(idx, start_row)
112
+ idxa = idx[i_first_valid:]
155
113
  # don't readout more than n_rows indices
156
- idx = (idxa[:n_rows],) # works even if n_rows > len(idxa)
114
+ idx = idxa[:n_rows] # works even if n_rows > len(idxa)
157
115
 
158
116
  if lgdotype is Table:
159
117
  return _h5_read_table(
160
118
  h5o,
119
+ fname,
120
+ oname,
121
+ start_row=start_row,
122
+ n_rows=n_rows,
123
+ idx=idx,
124
+ use_h5idx=use_h5idx,
125
+ field_mask=field_mask,
126
+ obj_buf=obj_buf,
127
+ obj_buf_start=obj_buf_start,
128
+ decompress=decompress,
129
+ )
130
+
131
+ if lgdotype is Histogram:
132
+ return _h5_read_histogram(
133
+ h5o,
134
+ fname,
135
+ oname,
161
136
  start_row=start_row,
162
137
  n_rows=n_rows,
163
138
  idx=idx,
@@ -171,6 +146,8 @@ def _h5_read_lgdo(
171
146
  if lgdotype is ArrayOfEncodedEqualSizedArrays:
172
147
  return _h5_read_array_of_encoded_equalsized_arrays(
173
148
  h5o,
149
+ fname,
150
+ oname,
174
151
  start_row=start_row,
175
152
  n_rows=n_rows,
176
153
  idx=idx,
@@ -183,6 +160,8 @@ def _h5_read_lgdo(
183
160
  if lgdotype is VectorOfEncodedVectors:
184
161
  return _h5_read_vector_of_encoded_vectors(
185
162
  h5o,
163
+ fname,
164
+ oname,
186
165
  start_row=start_row,
187
166
  n_rows=n_rows,
188
167
  idx=idx,
@@ -195,6 +174,8 @@ def _h5_read_lgdo(
195
174
  if lgdotype is VectorOfVectors:
196
175
  return _h5_read_vector_of_vectors(
197
176
  h5o,
177
+ fname,
178
+ oname,
198
179
  start_row=start_row,
199
180
  n_rows=n_rows,
200
181
  idx=idx,
@@ -206,6 +187,8 @@ def _h5_read_lgdo(
206
187
  if lgdotype is FixedSizeArray:
207
188
  return _h5_read_fixedsize_array(
208
189
  h5o,
190
+ fname,
191
+ oname,
209
192
  start_row=start_row,
210
193
  n_rows=n_rows,
211
194
  idx=idx,
@@ -217,6 +200,8 @@ def _h5_read_lgdo(
217
200
  if lgdotype is ArrayOfEqualSizedArrays:
218
201
  return _h5_read_array_of_equalsized_arrays(
219
202
  h5o,
203
+ fname,
204
+ oname,
220
205
  start_row=start_row,
221
206
  n_rows=n_rows,
222
207
  idx=idx,
@@ -228,6 +213,8 @@ def _h5_read_lgdo(
228
213
  if lgdotype is Array:
229
214
  return _h5_read_array(
230
215
  h5o,
216
+ fname,
217
+ oname,
231
218
  start_row=start_row,
232
219
  n_rows=n_rows,
233
220
  idx=idx,
@@ -237,11 +224,13 @@ def _h5_read_lgdo(
237
224
  )
238
225
 
239
226
  msg = f"no rule to decode {lgdotype.__name__} from LH5"
240
- raise LH5DecodeError(msg, h5o)
227
+ raise LH5DecodeError(msg, fname, oname)
241
228
 
242
229
 
243
230
  def _h5_read_struct(
244
231
  h5g,
232
+ fname,
233
+ oname,
245
234
  start_row=0,
246
235
  n_rows=sys.maxsize,
247
236
  idx=None,
@@ -254,7 +243,7 @@ def _h5_read_struct(
254
243
  # table... Maybe should emit a warning? Or allow them to be
255
244
  # dicts keyed by field name?
256
245
 
257
- attrs = dict(h5g.attrs)
246
+ attrs = utils.read_attrs(h5g, fname, oname)
258
247
 
259
248
  # determine fields to be read out
260
249
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -272,20 +261,26 @@ def _h5_read_struct(
272
261
  for field in selected_fields:
273
262
  # support for integer keys
274
263
  field_key = int(field) if attrs.get("int_keys") else str(field)
264
+ h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
275
265
  obj_dict[field_key], _ = _h5_read_lgdo(
276
- h5g[field],
266
+ h5o,
267
+ fname,
268
+ f"{oname}/{field}",
277
269
  start_row=start_row,
278
270
  n_rows=n_rows,
279
271
  idx=idx,
280
272
  use_h5idx=use_h5idx,
281
273
  decompress=decompress,
282
274
  )
275
+ h5o.close()
283
276
 
284
277
  return Struct(obj_dict=obj_dict, attrs=attrs), 1
285
278
 
286
279
 
287
280
  def _h5_read_table(
288
281
  h5g,
282
+ fname,
283
+ oname,
289
284
  start_row=0,
290
285
  n_rows=sys.maxsize,
291
286
  idx=None,
@@ -297,9 +292,9 @@ def _h5_read_table(
297
292
  ):
298
293
  if obj_buf is not None and not isinstance(obj_buf, Table):
299
294
  msg = "provided object buffer is not a Table"
300
- raise LH5DecodeError(msg, h5g)
295
+ raise LH5DecodeError(msg, fname, oname)
301
296
 
302
- attrs = dict(h5g.attrs)
297
+ attrs = utils.read_attrs(h5g, fname, oname)
303
298
 
304
299
  # determine fields to be read out
305
300
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -320,12 +315,15 @@ def _h5_read_table(
320
315
  if obj_buf is not None:
321
316
  if not isinstance(obj_buf, Table) or field not in obj_buf:
322
317
  msg = "provided object buffer is not a Table or columns are missing"
323
- raise LH5DecodeError(msg, h5g)
318
+ raise LH5DecodeError(msg, fname, oname)
324
319
 
325
320
  fld_buf = obj_buf[field]
326
321
 
322
+ h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
327
323
  col_dict[field], n_rows_read = _h5_read_lgdo(
328
- h5g[field],
324
+ h5o,
325
+ fname,
326
+ f"{oname}/{field}",
329
327
  start_row=start_row,
330
328
  n_rows=n_rows,
331
329
  idx=idx,
@@ -334,6 +332,7 @@ def _h5_read_table(
334
332
  obj_buf_start=obj_buf_start,
335
333
  decompress=decompress,
336
334
  )
335
+ h5o.close()
337
336
 
338
337
  if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
339
338
  obj_buf.resize(obj_buf_start + n_rows_read)
@@ -345,12 +344,12 @@ def _h5_read_table(
345
344
  n_rows_read = rows_read[0]
346
345
  else:
347
346
  n_rows_read = 0
348
- log.warning(f"Table '{h5g.name}' has no fields specified by {field_mask=}")
347
+ log.warning(f"Table '{oname}' has no fields specified by {field_mask=}")
349
348
 
350
349
  for n in rows_read[1:]:
351
350
  if n != n_rows_read:
352
351
  log.warning(
353
- f"Table '{h5g.name}' got strange n_rows_read = {n}, "
352
+ f"Table '{oname}' got strange n_rows_read = {n}, "
354
353
  "{n_rows_read} was expected ({rows_read})"
355
354
  )
356
355
 
@@ -382,6 +381,59 @@ def _h5_read_table(
382
381
  obj_buf.loc = obj_buf_start + n_rows_read
383
382
 
384
383
  # check attributes
385
- utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5g)
384
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
386
385
 
387
386
  return obj_buf, n_rows_read
387
+
388
+
389
+ def _h5_read_histogram(
390
+ h5g,
391
+ fname,
392
+ oname,
393
+ start_row=0,
394
+ n_rows=sys.maxsize,
395
+ idx=None,
396
+ use_h5idx=False,
397
+ field_mask=None,
398
+ obj_buf=None,
399
+ obj_buf_start=0,
400
+ decompress=True,
401
+ ):
402
+ if obj_buf is not None or obj_buf_start != 0:
403
+ msg = "reading a histogram into an existing object buffer is not supported"
404
+ raise LH5DecodeError(msg, fname, oname)
405
+
406
+ struct, n_rows_read = _h5_read_struct(
407
+ h5g,
408
+ fname,
409
+ oname,
410
+ start_row=start_row,
411
+ n_rows=n_rows,
412
+ idx=idx,
413
+ use_h5idx=use_h5idx,
414
+ field_mask=field_mask,
415
+ decompress=decompress,
416
+ )
417
+
418
+ binning = []
419
+ for _, a in struct.binning.items():
420
+ be = a.binedges
421
+ if isinstance(be, Struct):
422
+ b = (None, be.first.value, be.last.value, be.step.value, a.closedleft.value)
423
+ elif isinstance(be, Array):
424
+ b = (be, None, None, None, a.closedleft.value)
425
+ else:
426
+ msg = "unexpected binning of histogram"
427
+ raise LH5DecodeError(msg, fname, oname)
428
+ ax = Histogram.Axis(*b)
429
+ # copy attrs to "clone" the "whole" struct.
430
+ ax.attrs = a.getattrs(datatype=True)
431
+ ax["binedges"].attrs = be.getattrs(datatype=True)
432
+ binning.append(ax)
433
+
434
+ isdensity = struct.isdensity.value
435
+ weights = struct.weights
436
+ attrs = struct.getattrs(datatype=True)
437
+ histogram = Histogram(weights, binning, isdensity, attrs=attrs)
438
+
439
+ return histogram, n_rows_read
@@ -3,6 +3,8 @@ from __future__ import annotations
3
3
  import logging
4
4
  import sys
5
5
 
6
+ import h5py
7
+
6
8
  from .... import compression as compress
7
9
  from ....types import (
8
10
  ArrayOfEncodedEqualSizedArrays,
@@ -13,6 +15,7 @@ from .array import (
13
15
  _h5_read_array,
14
16
  )
15
17
  from .scalar import _h5_read_scalar
18
+ from .utils import read_attrs
16
19
  from .vector_of_vectors import _h5_read_vector_of_vectors
17
20
 
18
21
  log = logging.getLogger(__name__)
@@ -20,21 +23,29 @@ log = logging.getLogger(__name__)
20
23
 
21
24
  def _h5_read_array_of_encoded_equalsized_arrays(
22
25
  h5g,
26
+ fname,
27
+ oname,
23
28
  **kwargs,
24
29
  ):
25
- return _h5_read_encoded_array(ArrayOfEncodedEqualSizedArrays, h5g, **kwargs)
30
+ return _h5_read_encoded_array(
31
+ ArrayOfEncodedEqualSizedArrays, h5g, fname, oname, **kwargs
32
+ )
26
33
 
27
34
 
28
35
  def _h5_read_vector_of_encoded_vectors(
29
36
  h5g,
37
+ fname,
38
+ oname,
30
39
  **kwargs,
31
40
  ):
32
- return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
41
+ return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, fname, oname, **kwargs)
33
42
 
34
43
 
35
44
  def _h5_read_encoded_array(
36
45
  lgdotype,
37
46
  h5g,
47
+ fname,
48
+ oname,
38
49
  start_row=0,
39
50
  n_rows=sys.maxsize,
40
51
  idx=None,
@@ -45,11 +56,11 @@ def _h5_read_encoded_array(
45
56
  ):
46
57
  if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
47
58
  msg = f"unsupported read of encoded type {lgdotype.__name__}"
48
- raise LH5DecodeError(msg, h5g)
59
+ raise LH5DecodeError(msg, fname, oname)
49
60
 
50
61
  if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
51
62
  msg = f"object buffer is not a {lgdotype.__name__}"
52
- raise LH5DecodeError(msg, h5g)
63
+ raise LH5DecodeError(msg, fname, oname)
53
64
 
54
65
  # read out decoded_size, either a Scalar or an Array
55
66
  decoded_size_buf = encoded_data_buf = None
@@ -58,8 +69,11 @@ def _h5_read_encoded_array(
58
69
  encoded_data_buf = obj_buf.encoded_data
59
70
 
60
71
  if lgdotype is VectorOfEncodedVectors:
72
+ h5o = h5py.h5o.open(h5g, b"decoded_size")
61
73
  decoded_size, _ = _h5_read_array(
62
- h5g["decoded_size"],
74
+ h5o,
75
+ fname,
76
+ f"{oname}/decoded_size",
63
77
  start_row=start_row,
64
78
  n_rows=n_rows,
65
79
  idx=idx,
@@ -67,16 +81,23 @@ def _h5_read_encoded_array(
67
81
  obj_buf=None if decompress else decoded_size_buf,
68
82
  obj_buf_start=0 if decompress else obj_buf_start,
69
83
  )
70
-
84
+ h5o.close()
71
85
  else:
86
+ h5o = h5py.h5o.open(h5g, b"decoded_size")
72
87
  decoded_size, _ = _h5_read_scalar(
73
- h5g["decoded_size"],
88
+ h5o,
89
+ fname,
90
+ f"{oname}/decoded_size",
74
91
  obj_buf=None if decompress else decoded_size_buf,
75
92
  )
93
+ h5o.close()
76
94
 
77
95
  # read out encoded_data, a VectorOfVectors
96
+ h5o = h5py.h5o.open(h5g, b"encoded_data")
78
97
  encoded_data, n_rows_read = _h5_read_vector_of_vectors(
79
- h5g["encoded_data"],
98
+ h5o,
99
+ fname,
100
+ f"{oname}/encoded_data",
80
101
  start_row=start_row,
81
102
  n_rows=n_rows,
82
103
  idx=idx,
@@ -84,6 +105,7 @@ def _h5_read_encoded_array(
84
105
  obj_buf=None if decompress else encoded_data_buf,
85
106
  obj_buf_start=0 if decompress else obj_buf_start,
86
107
  )
108
+ h5o.close()
87
109
 
88
110
  # return the still encoded data in the buffer object, if there
89
111
  if obj_buf is not None and not decompress:
@@ -93,7 +115,7 @@ def _h5_read_encoded_array(
93
115
  rawdata = lgdotype(
94
116
  encoded_data=encoded_data,
95
117
  decoded_size=decoded_size,
96
- attrs=dict(h5g.attrs),
118
+ attrs=read_attrs(h5g, fname, oname),
97
119
  )
98
120
 
99
121
  # already return if no decompression is requested