legend-pydataobj 1.9.0__tar.gz → 1.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/PKG-INFO +2 -2
  2. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/pyproject.toml +1 -1
  3. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/PKG-INFO +2 -2
  4. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/requires.txt +1 -1
  5. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/_version.py +2 -2
  6. legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/array.py +34 -0
  7. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/composite.py +68 -78
  8. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/encoded.py +31 -9
  9. legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/ndarray.py +117 -0
  10. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/scalar.py +10 -3
  11. legend_pydataobj-1.10.0/src/lgdo/lh5/_serializers/read/utils.py +35 -0
  12. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +35 -13
  13. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/array.py +6 -1
  14. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/composite.py +14 -5
  15. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/scalar.py +6 -1
  16. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/core.py +78 -7
  17. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/exceptions.py +3 -3
  18. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/store.py +101 -11
  19. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/tools.py +1 -1
  20. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/utils.py +13 -2
  21. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/histogram.py +18 -3
  22. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_core.py +6 -0
  23. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_write.py +11 -1
  24. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_histogram.py +9 -3
  25. legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/array.py +0 -34
  26. legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/ndarray.py +0 -103
  27. legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/utils.py +0 -12
  28. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/LICENSE +0 -0
  29. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/README.md +0 -0
  30. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/setup.cfg +0 -0
  31. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -0
  32. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  33. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
  34. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  35. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  36. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/__init__.py +0 -0
  37. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/cli.py +0 -0
  38. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/__init__.py +0 -0
  39. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/base.py +0 -0
  40. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/generic.py +0 -0
  41. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/radware.py +0 -0
  42. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/utils.py +0 -0
  43. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/compression/varlen.py +0 -0
  44. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lgdo_utils.py +0 -0
  45. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/__init__.py +0 -0
  46. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/__init__.py +0 -0
  47. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  48. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  49. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
  50. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/datatype.py +0 -0
  51. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5/iterator.py +0 -0
  52. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/lh5_store.py +0 -0
  53. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/logging.py +0 -0
  54. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/__init__.py +0 -0
  55. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/array.py +0 -0
  56. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  57. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/encoded.py +0 -0
  58. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/fixedsizearray.py +0 -0
  59. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/lgdo.py +0 -0
  60. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/scalar.py +0 -0
  61. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/struct.py +0 -0
  62. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/table.py +0 -0
  63. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/vectorofvectors.py +0 -0
  64. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/vovutils.py +0 -0
  65. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/types/waveformtable.py +0 -0
  66. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/units.py +0 -0
  67. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/src/lgdo/utils.py +0 -0
  68. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/conftest.py +0 -0
  69. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  70. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  71. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_compression.py +0 -0
  72. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_radware_sigcompress.py +0 -0
  73. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_str2wfcodec.py +0 -0
  74. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
  75. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/conftest.py +0 -0
  76. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/conftest.py +0 -0
  77. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_datatype.py +0 -0
  78. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_iterator.py +0 -0
  79. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_store.py +0 -0
  80. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_tools.py +0 -0
  81. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/lh5/test_lh5_utils.py +0 -0
  82. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/test_cli.py +0 -0
  83. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/test_lgdo_utils.py +0 -0
  84. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_array.py +0 -0
  85. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  86. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_encoded.py +0 -0
  87. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_fixedsizearray.py +0 -0
  88. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_representations.py +0 -0
  89. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_scalar.py +0 -0
  90. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_struct.py +0 -0
  91. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_table.py +0 -0
  92. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_table_eval.py +0 -0
  93. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_vectorofvectors.py +0 -0
  94. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_vovutils.py +0 -0
  95. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.0}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.9.0
3
+ Version: 1.10.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -700,7 +700,7 @@ License-File: LICENSE
700
700
  Requires-Dist: awkward>=2
701
701
  Requires-Dist: awkward-pandas
702
702
  Requires-Dist: colorlog
703
- Requires-Dist: h5py>=3.2
703
+ Requires-Dist: h5py>=3.10
704
704
  Requires-Dist: hdf5plugin
705
705
  Requires-Dist: hist
706
706
  Requires-Dist: numba!=0.53.*,!=0.54.*
@@ -34,7 +34,7 @@ dependencies = [
34
34
  "awkward>=2",
35
35
  "awkward-pandas",
36
36
  "colorlog",
37
- "h5py>=3.2",
37
+ "h5py>=3.10",
38
38
  "hdf5plugin",
39
39
  "hist",
40
40
  "numba!=0.53.*,!=0.54.*",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.9.0
3
+ Version: 1.10.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -700,7 +700,7 @@ License-File: LICENSE
700
700
  Requires-Dist: awkward>=2
701
701
  Requires-Dist: awkward-pandas
702
702
  Requires-Dist: colorlog
703
- Requires-Dist: h5py>=3.2
703
+ Requires-Dist: h5py>=3.10
704
704
  Requires-Dist: hdf5plugin
705
705
  Requires-Dist: hist
706
706
  Requires-Dist: numba!=0.53.*,!=0.54.*
@@ -1,7 +1,7 @@
1
1
  awkward>=2
2
2
  awkward-pandas
3
3
  colorlog
4
- h5py>=3.2
4
+ h5py>=3.10
5
5
  hdf5plugin
6
6
  hist
7
7
  numba!=0.53.*,!=0.54.*
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.9.0'
16
- __version_tuple__ = version_tuple = (1, 9, 0)
15
+ __version__ = version = '1.10.0'
16
+ __version_tuple__ = version_tuple = (1, 10, 0)
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
6
+ from . import utils
7
+ from .ndarray import _h5_read_ndarray
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def _h5_read_array_generic(type_, h5d, fname, oname, **kwargs):
13
+ nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, fname, oname, **kwargs)
14
+
15
+ obj_buf = kwargs["obj_buf"]
16
+
17
+ if obj_buf is None:
18
+ return type_(nda=nda, attrs=attrs), n_rows_to_read
19
+
20
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
21
+
22
+ return obj_buf, n_rows_to_read
23
+
24
+
25
+ def _h5_read_array(h5d, fname, oname, **kwargs):
26
+ return _h5_read_array_generic(Array, h5d, fname, oname, **kwargs)
27
+
28
+
29
+ def _h5_read_fixedsize_array(h5d, fname, oname, **kwargs):
30
+ return _h5_read_array_generic(FixedSizeArray, h5d, fname, oname, **kwargs)
31
+
32
+
33
+ def _h5_read_array_of_equalsized_arrays(h5d, fname, oname, **kwargs):
34
+ return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, fname, oname, **kwargs)
@@ -23,7 +23,6 @@ from ....types import (
23
23
  )
24
24
  from ... import datatype as dtypeutils
25
25
  from ...exceptions import LH5DecodeError
26
- from ...utils import read_n_rows
27
26
  from . import utils
28
27
  from .array import (
29
28
  _h5_read_array,
@@ -42,6 +41,8 @@ log = logging.getLogger(__name__)
42
41
 
43
42
  def _h5_read_lgdo(
44
43
  h5o,
44
+ fname,
45
+ oname,
45
46
  start_row=0,
46
47
  n_rows=sys.maxsize,
47
48
  idx=None,
@@ -51,69 +52,23 @@ def _h5_read_lgdo(
51
52
  obj_buf_start=0,
52
53
  decompress=True,
53
54
  ):
54
- # Handle list-of-files recursively
55
- if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
56
- lh5_objs = list(h5o)
57
- n_rows_read = 0
58
-
59
- for i, _h5o in enumerate(lh5_objs):
60
- if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
61
- # a list of lists: must be one per file
62
- idx_i = idx[i]
63
- elif idx is not None:
64
- # make idx a proper tuple if it's not one already
65
- if not (isinstance(idx, tuple) and len(idx) == 1):
66
- idx = (idx,)
67
- # idx is a long continuous array
68
- n_rows_i = read_n_rows(_h5o)
69
- # find the length of the subset of idx that contains indices
70
- # that are less than n_rows_i
71
- n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
72
- # now split idx into idx_i and the remainder
73
- idx_i = (idx[0][:n_rows_to_read_i],)
74
- idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
75
- else:
76
- idx_i = None
77
- n_rows_i = n_rows - n_rows_read
78
-
79
- obj_buf, n_rows_read_i = _h5_read_lgdo(
80
- _h5o,
81
- start_row=start_row,
82
- n_rows=n_rows_i,
83
- idx=idx_i,
84
- use_h5idx=use_h5idx,
85
- field_mask=field_mask,
86
- obj_buf=obj_buf,
87
- obj_buf_start=obj_buf_start,
88
- decompress=decompress,
89
- )
90
-
91
- n_rows_read += n_rows_read_i
92
- if n_rows_read >= n_rows or obj_buf is None:
93
- return obj_buf, n_rows_read
94
- start_row = 0
95
- obj_buf_start += n_rows_read_i
96
-
97
- return obj_buf, n_rows_read
98
-
99
55
  log.debug(
100
- f"reading {h5o.file.filename}:{h5o.name}[{start_row}:{n_rows}], decompress = {decompress}, "
56
+ f"reading {fname}:{oname}[{start_row}:{n_rows}], decompress = {decompress}, "
101
57
  + (f" with field mask {field_mask}" if field_mask else "")
102
58
  )
103
59
 
104
- # make idx a proper tuple if it's not one already
105
- if not (isinstance(idx, tuple) and len(idx) == 1) and idx is not None:
106
- idx = (idx,)
107
-
60
+ attrs = utils.read_attrs(h5o, fname, oname)
108
61
  try:
109
- lgdotype = dtypeutils.datatype(h5o.attrs["datatype"])
62
+ lgdotype = dtypeutils.datatype(attrs["datatype"])
110
63
  except KeyError as e:
111
64
  msg = "dataset not in file or missing 'datatype' attribute"
112
- raise LH5DecodeError(msg, h5o) from e
65
+ raise LH5DecodeError(msg, fname, oname) from e
113
66
 
114
67
  if lgdotype is Scalar:
115
68
  return _h5_read_scalar(
116
69
  h5o,
70
+ fname,
71
+ oname,
117
72
  obj_buf=obj_buf,
118
73
  )
119
74
 
@@ -125,7 +80,7 @@ def _h5_read_lgdo(
125
80
  if len(field_mask) > 0:
126
81
  default = not field_mask[next(iter(field_mask.keys()))]
127
82
  field_mask = defaultdict(lambda: default, field_mask)
128
- elif isinstance(field_mask, (list, tuple)):
83
+ elif isinstance(field_mask, (list, tuple, set)):
129
84
  field_mask = defaultdict(bool, {field: True for field in field_mask})
130
85
  elif not isinstance(field_mask, defaultdict):
131
86
  msg = "bad field_mask type"
@@ -134,6 +89,8 @@ def _h5_read_lgdo(
134
89
  if lgdotype is Struct:
135
90
  return _h5_read_struct(
136
91
  h5o,
92
+ fname,
93
+ oname,
137
94
  start_row=start_row,
138
95
  n_rows=n_rows,
139
96
  idx=idx,
@@ -145,20 +102,22 @@ def _h5_read_lgdo(
145
102
  # Below here is all array-like types. So trim idx if needed
146
103
  if idx is not None:
147
104
  # check if idx is just an ordered list of the integers if so can ignore
148
- if (idx[0] == np.arange(0, len(idx[0]), 1)).all():
149
- if n_rows > len(idx[0]):
150
- n_rows = len(idx[0])
105
+ if (idx == np.arange(0, len(idx), 1)).all():
106
+ if n_rows > len(idx):
107
+ n_rows = len(idx)
151
108
  idx = None
152
109
  else:
153
110
  # chop off indices < start_row
154
- i_first_valid = bisect.bisect_left(idx[0], start_row)
155
- idxa = idx[0][i_first_valid:]
111
+ i_first_valid = bisect.bisect_left(idx, start_row)
112
+ idxa = idx[i_first_valid:]
156
113
  # don't readout more than n_rows indices
157
- idx = (idxa[:n_rows],) # works even if n_rows > len(idxa)
114
+ idx = idxa[:n_rows] # works even if n_rows > len(idxa)
158
115
 
159
116
  if lgdotype is Table:
160
117
  return _h5_read_table(
161
118
  h5o,
119
+ fname,
120
+ oname,
162
121
  start_row=start_row,
163
122
  n_rows=n_rows,
164
123
  idx=idx,
@@ -172,6 +131,8 @@ def _h5_read_lgdo(
172
131
  if lgdotype is Histogram:
173
132
  return _h5_read_histogram(
174
133
  h5o,
134
+ fname,
135
+ oname,
175
136
  start_row=start_row,
176
137
  n_rows=n_rows,
177
138
  idx=idx,
@@ -185,6 +146,8 @@ def _h5_read_lgdo(
185
146
  if lgdotype is ArrayOfEncodedEqualSizedArrays:
186
147
  return _h5_read_array_of_encoded_equalsized_arrays(
187
148
  h5o,
149
+ fname,
150
+ oname,
188
151
  start_row=start_row,
189
152
  n_rows=n_rows,
190
153
  idx=idx,
@@ -197,6 +160,8 @@ def _h5_read_lgdo(
197
160
  if lgdotype is VectorOfEncodedVectors:
198
161
  return _h5_read_vector_of_encoded_vectors(
199
162
  h5o,
163
+ fname,
164
+ oname,
200
165
  start_row=start_row,
201
166
  n_rows=n_rows,
202
167
  idx=idx,
@@ -209,6 +174,8 @@ def _h5_read_lgdo(
209
174
  if lgdotype is VectorOfVectors:
210
175
  return _h5_read_vector_of_vectors(
211
176
  h5o,
177
+ fname,
178
+ oname,
212
179
  start_row=start_row,
213
180
  n_rows=n_rows,
214
181
  idx=idx,
@@ -220,6 +187,8 @@ def _h5_read_lgdo(
220
187
  if lgdotype is FixedSizeArray:
221
188
  return _h5_read_fixedsize_array(
222
189
  h5o,
190
+ fname,
191
+ oname,
223
192
  start_row=start_row,
224
193
  n_rows=n_rows,
225
194
  idx=idx,
@@ -231,6 +200,8 @@ def _h5_read_lgdo(
231
200
  if lgdotype is ArrayOfEqualSizedArrays:
232
201
  return _h5_read_array_of_equalsized_arrays(
233
202
  h5o,
203
+ fname,
204
+ oname,
234
205
  start_row=start_row,
235
206
  n_rows=n_rows,
236
207
  idx=idx,
@@ -242,6 +213,8 @@ def _h5_read_lgdo(
242
213
  if lgdotype is Array:
243
214
  return _h5_read_array(
244
215
  h5o,
216
+ fname,
217
+ oname,
245
218
  start_row=start_row,
246
219
  n_rows=n_rows,
247
220
  idx=idx,
@@ -251,11 +224,13 @@ def _h5_read_lgdo(
251
224
  )
252
225
 
253
226
  msg = f"no rule to decode {lgdotype.__name__} from LH5"
254
- raise LH5DecodeError(msg, h5o)
227
+ raise LH5DecodeError(msg, fname, oname)
255
228
 
256
229
 
257
230
  def _h5_read_struct(
258
231
  h5g,
232
+ fname,
233
+ oname,
259
234
  start_row=0,
260
235
  n_rows=sys.maxsize,
261
236
  idx=None,
@@ -268,7 +243,7 @@ def _h5_read_struct(
268
243
  # table... Maybe should emit a warning? Or allow them to be
269
244
  # dicts keyed by field name?
270
245
 
271
- attrs = dict(h5g.attrs)
246
+ attrs = utils.read_attrs(h5g, fname, oname)
272
247
 
273
248
  # determine fields to be read out
274
249
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -286,20 +261,26 @@ def _h5_read_struct(
286
261
  for field in selected_fields:
287
262
  # support for integer keys
288
263
  field_key = int(field) if attrs.get("int_keys") else str(field)
264
+ h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
289
265
  obj_dict[field_key], _ = _h5_read_lgdo(
290
- h5g[field],
266
+ h5o,
267
+ fname,
268
+ f"{oname}/{field}",
291
269
  start_row=start_row,
292
270
  n_rows=n_rows,
293
271
  idx=idx,
294
272
  use_h5idx=use_h5idx,
295
273
  decompress=decompress,
296
274
  )
275
+ h5o.close()
297
276
 
298
277
  return Struct(obj_dict=obj_dict, attrs=attrs), 1
299
278
 
300
279
 
301
280
  def _h5_read_table(
302
281
  h5g,
282
+ fname,
283
+ oname,
303
284
  start_row=0,
304
285
  n_rows=sys.maxsize,
305
286
  idx=None,
@@ -311,9 +292,9 @@ def _h5_read_table(
311
292
  ):
312
293
  if obj_buf is not None and not isinstance(obj_buf, Table):
313
294
  msg = "provided object buffer is not a Table"
314
- raise LH5DecodeError(msg, h5g)
295
+ raise LH5DecodeError(msg, fname, oname)
315
296
 
316
- attrs = dict(h5g.attrs)
297
+ attrs = utils.read_attrs(h5g, fname, oname)
317
298
 
318
299
  # determine fields to be read out
319
300
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -334,12 +315,15 @@ def _h5_read_table(
334
315
  if obj_buf is not None:
335
316
  if not isinstance(obj_buf, Table) or field not in obj_buf:
336
317
  msg = "provided object buffer is not a Table or columns are missing"
337
- raise LH5DecodeError(msg, h5g)
318
+ raise LH5DecodeError(msg, fname, oname)
338
319
 
339
320
  fld_buf = obj_buf[field]
340
321
 
322
+ h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
341
323
  col_dict[field], n_rows_read = _h5_read_lgdo(
342
- h5g[field],
324
+ h5o,
325
+ fname,
326
+ f"{oname}/{field}",
343
327
  start_row=start_row,
344
328
  n_rows=n_rows,
345
329
  idx=idx,
@@ -348,6 +332,7 @@ def _h5_read_table(
348
332
  obj_buf_start=obj_buf_start,
349
333
  decompress=decompress,
350
334
  )
335
+ h5o.close()
351
336
 
352
337
  if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
353
338
  obj_buf.resize(obj_buf_start + n_rows_read)
@@ -359,12 +344,12 @@ def _h5_read_table(
359
344
  n_rows_read = rows_read[0]
360
345
  else:
361
346
  n_rows_read = 0
362
- log.warning(f"Table '{h5g.name}' has no fields specified by {field_mask=}")
347
+ log.warning(f"Table '{oname}' has no fields specified by {field_mask=}")
363
348
 
364
349
  for n in rows_read[1:]:
365
350
  if n != n_rows_read:
366
351
  log.warning(
367
- f"Table '{h5g.name}' got strange n_rows_read = {n}, "
352
+ f"Table '{oname}' got strange n_rows_read = {n}, "
368
353
  "{n_rows_read} was expected ({rows_read})"
369
354
  )
370
355
 
@@ -396,13 +381,15 @@ def _h5_read_table(
396
381
  obj_buf.loc = obj_buf_start + n_rows_read
397
382
 
398
383
  # check attributes
399
- utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5g)
384
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
400
385
 
401
386
  return obj_buf, n_rows_read
402
387
 
403
388
 
404
389
  def _h5_read_histogram(
405
390
  h5g,
391
+ fname,
392
+ oname,
406
393
  start_row=0,
407
394
  n_rows=sys.maxsize,
408
395
  idx=None,
@@ -414,17 +401,20 @@ def _h5_read_histogram(
414
401
  ):
415
402
  if obj_buf is not None or obj_buf_start != 0:
416
403
  msg = "reading a histogram into an existing object buffer is not supported"
417
- raise LH5DecodeError(msg, h5g)
404
+ raise LH5DecodeError(msg, fname, oname)
418
405
 
419
406
  struct, n_rows_read = _h5_read_struct(
420
407
  h5g,
421
- start_row,
422
- n_rows,
423
- idx,
424
- use_h5idx,
425
- field_mask,
426
- decompress,
408
+ fname,
409
+ oname,
410
+ start_row=start_row,
411
+ n_rows=n_rows,
412
+ idx=idx,
413
+ use_h5idx=use_h5idx,
414
+ field_mask=field_mask,
415
+ decompress=decompress,
427
416
  )
417
+
428
418
  binning = []
429
419
  for _, a in struct.binning.items():
430
420
  be = a.binedges
@@ -434,7 +424,7 @@ def _h5_read_histogram(
434
424
  b = (be, None, None, None, a.closedleft.value)
435
425
  else:
436
426
  msg = "unexpected binning of histogram"
437
- raise LH5DecodeError(msg, h5g)
427
+ raise LH5DecodeError(msg, fname, oname)
438
428
  ax = Histogram.Axis(*b)
439
429
  # copy attrs to "clone" the "whole" struct.
440
430
  ax.attrs = a.getattrs(datatype=True)
@@ -3,6 +3,8 @@ from __future__ import annotations
3
3
  import logging
4
4
  import sys
5
5
 
6
+ import h5py
7
+
6
8
  from .... import compression as compress
7
9
  from ....types import (
8
10
  ArrayOfEncodedEqualSizedArrays,
@@ -13,6 +15,7 @@ from .array import (
13
15
  _h5_read_array,
14
16
  )
15
17
  from .scalar import _h5_read_scalar
18
+ from .utils import read_attrs
16
19
  from .vector_of_vectors import _h5_read_vector_of_vectors
17
20
 
18
21
  log = logging.getLogger(__name__)
@@ -20,21 +23,29 @@ log = logging.getLogger(__name__)
20
23
 
21
24
  def _h5_read_array_of_encoded_equalsized_arrays(
22
25
  h5g,
26
+ fname,
27
+ oname,
23
28
  **kwargs,
24
29
  ):
25
- return _h5_read_encoded_array(ArrayOfEncodedEqualSizedArrays, h5g, **kwargs)
30
+ return _h5_read_encoded_array(
31
+ ArrayOfEncodedEqualSizedArrays, h5g, fname, oname, **kwargs
32
+ )
26
33
 
27
34
 
28
35
  def _h5_read_vector_of_encoded_vectors(
29
36
  h5g,
37
+ fname,
38
+ oname,
30
39
  **kwargs,
31
40
  ):
32
- return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
41
+ return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, fname, oname, **kwargs)
33
42
 
34
43
 
35
44
  def _h5_read_encoded_array(
36
45
  lgdotype,
37
46
  h5g,
47
+ fname,
48
+ oname,
38
49
  start_row=0,
39
50
  n_rows=sys.maxsize,
40
51
  idx=None,
@@ -45,11 +56,11 @@ def _h5_read_encoded_array(
45
56
  ):
46
57
  if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
47
58
  msg = f"unsupported read of encoded type {lgdotype.__name__}"
48
- raise LH5DecodeError(msg, h5g)
59
+ raise LH5DecodeError(msg, fname, oname)
49
60
 
50
61
  if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
51
62
  msg = f"object buffer is not a {lgdotype.__name__}"
52
- raise LH5DecodeError(msg, h5g)
63
+ raise LH5DecodeError(msg, fname, oname)
53
64
 
54
65
  # read out decoded_size, either a Scalar or an Array
55
66
  decoded_size_buf = encoded_data_buf = None
@@ -58,8 +69,11 @@ def _h5_read_encoded_array(
58
69
  encoded_data_buf = obj_buf.encoded_data
59
70
 
60
71
  if lgdotype is VectorOfEncodedVectors:
72
+ h5o = h5py.h5o.open(h5g, b"decoded_size")
61
73
  decoded_size, _ = _h5_read_array(
62
- h5g["decoded_size"],
74
+ h5o,
75
+ fname,
76
+ f"{oname}/decoded_size",
63
77
  start_row=start_row,
64
78
  n_rows=n_rows,
65
79
  idx=idx,
@@ -67,16 +81,23 @@ def _h5_read_encoded_array(
67
81
  obj_buf=None if decompress else decoded_size_buf,
68
82
  obj_buf_start=0 if decompress else obj_buf_start,
69
83
  )
70
-
84
+ h5o.close()
71
85
  else:
86
+ h5o = h5py.h5o.open(h5g, b"decoded_size")
72
87
  decoded_size, _ = _h5_read_scalar(
73
- h5g["decoded_size"],
88
+ h5o,
89
+ fname,
90
+ f"{oname}/decoded_size",
74
91
  obj_buf=None if decompress else decoded_size_buf,
75
92
  )
93
+ h5o.close()
76
94
 
77
95
  # read out encoded_data, a VectorOfVectors
96
+ h5o = h5py.h5o.open(h5g, b"encoded_data")
78
97
  encoded_data, n_rows_read = _h5_read_vector_of_vectors(
79
- h5g["encoded_data"],
98
+ h5o,
99
+ fname,
100
+ f"{oname}/encoded_data",
80
101
  start_row=start_row,
81
102
  n_rows=n_rows,
82
103
  idx=idx,
@@ -84,6 +105,7 @@ def _h5_read_encoded_array(
84
105
  obj_buf=None if decompress else encoded_data_buf,
85
106
  obj_buf_start=0 if decompress else obj_buf_start,
86
107
  )
108
+ h5o.close()
87
109
 
88
110
  # return the still encoded data in the buffer object, if there
89
111
  if obj_buf is not None and not decompress:
@@ -93,7 +115,7 @@ def _h5_read_encoded_array(
93
115
  rawdata = lgdotype(
94
116
  encoded_data=encoded_data,
95
117
  decoded_size=decoded_size,
96
- attrs=dict(h5g.attrs),
118
+ attrs=read_attrs(h5g, fname, oname),
97
119
  )
98
120
 
99
121
  # already return if no decompression is requested
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import sys
5
+ from bisect import bisect_left
6
+
7
+ import h5py
8
+ import numpy as np
9
+
10
+ from ....types import Array
11
+ from ... import datatype
12
+ from ...exceptions import LH5DecodeError
13
+ from .utils import read_attrs
14
+
15
+ log = logging.getLogger(__name__)
16
+
17
+
18
+ def _h5_read_ndarray(
19
+ h5d,
20
+ fname,
21
+ oname,
22
+ start_row=0,
23
+ n_rows=sys.maxsize,
24
+ idx=None,
25
+ use_h5idx=False,
26
+ obj_buf=None,
27
+ obj_buf_start=0,
28
+ ):
29
+ if obj_buf is not None and not isinstance(obj_buf, Array):
30
+ msg = "object buffer is not an Array"
31
+ raise LH5DecodeError(msg, fname, oname)
32
+
33
+ # compute the number of rows to read
34
+ # we culled idx above for start_row and n_rows, now we have to apply
35
+ # the constraint of the length of the dataset
36
+ try:
37
+ fspace = h5d.get_space()
38
+ ds_n_rows = fspace.shape[0]
39
+ except AttributeError as e:
40
+ msg = "does not seem to be an HDF5 dataset"
41
+ raise LH5DecodeError(msg, fname, oname) from e
42
+
43
+ if idx is not None:
44
+ if len(idx) > 0 and idx[-1] >= ds_n_rows:
45
+ log.warning("idx indexed past the end of the array in the file. Culling...")
46
+ n_rows_to_read = bisect_left(idx[0], ds_n_rows)
47
+ idx = (idx[:n_rows_to_read],)
48
+ if len(idx) == 0:
49
+ log.warning("idx empty after culling.")
50
+ n_rows_to_read = len(idx)
51
+ else:
52
+ n_rows_to_read = ds_n_rows - start_row
53
+ if n_rows_to_read > n_rows:
54
+ n_rows_to_read = n_rows
55
+
56
+ if idx is None:
57
+ fspace.select_hyperslab(
58
+ (start_row,) + (0,) * (h5d.rank - 1),
59
+ (1,) * h5d.rank,
60
+ None,
61
+ (n_rows_to_read,) + fspace.shape[1:],
62
+ )
63
+ elif use_h5idx:
64
+ # Note that h5s will automatically merge adjacent elements into a range
65
+ fspace.select_none()
66
+ for i in idx:
67
+ fspace.select_hyperslab(
68
+ (i,) + (0,) * (h5d.rank - 1),
69
+ (1,) * h5d.rank,
70
+ None,
71
+ (1,) + fspace.shape[1:],
72
+ h5py.h5s.SELECT_OR,
73
+ )
74
+
75
+ # Now read the array
76
+ if obj_buf is not None and n_rows_to_read > 0:
77
+ buf_size = obj_buf_start + n_rows_to_read
78
+ if len(obj_buf) < buf_size:
79
+ obj_buf.resize(buf_size)
80
+ dest_sel = np.s_[obj_buf_start:buf_size]
81
+
82
+ if idx is None or use_h5idx:
83
+ mspace = h5py.h5s.create_simple(obj_buf.nda.shape)
84
+ mspace.select_hyperslab(
85
+ (obj_buf_start,) + (0,) * (h5d.rank - 1),
86
+ (1,) * h5d.rank,
87
+ None,
88
+ (n_rows_to_read,) + fspace.shape[1:],
89
+ )
90
+ h5d.read(mspace, fspace, obj_buf.nda)
91
+ else:
92
+ tmp = np.empty(fspace.shape, h5d.dtype)
93
+ h5d.read(fspace, fspace, tmp)
94
+ obj_buf.nda[dest_sel, ...] = tmp[idx, ...]
95
+ nda = obj_buf.nda
96
+ elif n_rows == 0:
97
+ tmp_shape = (0,) + h5d.shape[1:]
98
+ nda = np.empty(tmp_shape, h5d.dtype)
99
+ else:
100
+ mspace = h5py.h5s.create_simple((n_rows_to_read,) + fspace.shape[1:])
101
+ nda = np.empty(mspace.shape, h5d.dtype)
102
+ if idx is None or use_h5idx:
103
+ h5d.read(mspace, fspace, nda)
104
+ else:
105
+ tmp = np.empty(fspace.shape, h5d.dtype)
106
+ h5d.read(fspace, fspace, tmp)
107
+ nda[:, ...] = tmp[idx, ...]
108
+
109
+ # Finally, set attributes and return objects
110
+ attrs = read_attrs(h5d, fname, oname)
111
+
112
+ # special handling for bools
113
+ # (c and Julia store as uint8 so cast to bool)
114
+ if datatype.get_nested_datatype_string(attrs["datatype"]) == "bool":
115
+ nda = nda.astype(np.bool_)
116
+
117
+ return (nda, attrs, n_rows_to_read)