legend-pydataobj 1.7.1__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/PKG-INFO +1 -1
  2. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/PKG-INFO +1 -1
  3. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/_version.py +2 -2
  4. legend_pydataobj-1.8.0/src/lgdo/lh5/_serializers/read/array.py +34 -0
  5. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/composite.py +30 -48
  6. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/encoded.py +11 -17
  7. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/ndarray.py +11 -12
  8. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/scalar.py +7 -7
  9. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/utils.py +3 -3
  10. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +10 -14
  11. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/core.py +13 -2
  12. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/exceptions.py +3 -3
  13. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/store.py +3 -4
  14. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/utils.py +1 -1
  15. legend_pydataobj-1.7.1/src/lgdo/lh5/_serializers/read/array.py +0 -34
  16. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/LICENSE +0 -0
  17. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/README.md +0 -0
  18. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/pyproject.toml +0 -0
  19. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/setup.cfg +0 -0
  20. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -0
  21. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  22. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
  23. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  24. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/requires.txt +0 -0
  25. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  26. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/__init__.py +0 -0
  27. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/cli.py +0 -0
  28. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/compression/__init__.py +0 -0
  29. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/compression/base.py +0 -0
  30. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/compression/generic.py +0 -0
  31. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/compression/radware.py +0 -0
  32. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/compression/utils.py +0 -0
  33. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/compression/varlen.py +0 -0
  34. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lgdo_utils.py +0 -0
  35. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/__init__.py +0 -0
  36. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/__init__.py +0 -0
  37. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  38. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  39. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/write/array.py +0 -0
  40. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/write/composite.py +0 -0
  41. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/write/scalar.py +0 -0
  42. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
  43. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/datatype.py +0 -0
  44. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/iterator.py +0 -0
  45. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5/tools.py +0 -0
  46. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/lh5_store.py +0 -0
  47. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/logging.py +0 -0
  48. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/__init__.py +0 -0
  49. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/array.py +0 -0
  50. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  51. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/encoded.py +0 -0
  52. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/fixedsizearray.py +0 -0
  53. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/lgdo.py +0 -0
  54. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/scalar.py +0 -0
  55. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/struct.py +0 -0
  56. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/table.py +0 -0
  57. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/vectorofvectors.py +0 -0
  58. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/vovutils.py +0 -0
  59. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/types/waveformtable.py +0 -0
  60. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/units.py +0 -0
  61. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/src/lgdo/utils.py +0 -0
  62. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/conftest.py +0 -0
  63. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  64. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  65. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/test_compression.py +0 -0
  66. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/test_radware_sigcompress.py +0 -0
  67. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/test_str2wfcodec.py +0 -0
  68. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
  69. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/conftest.py +0 -0
  70. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/conftest.py +0 -0
  71. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_core.py +0 -0
  72. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_lh5_datatype.py +0 -0
  73. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_lh5_iterator.py +0 -0
  74. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_lh5_store.py +0 -0
  75. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_lh5_tools.py +0 -0
  76. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_lh5_utils.py +0 -0
  77. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/lh5/test_lh5_write.py +0 -0
  78. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/test_cli.py +0 -0
  79. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/test_lgdo_utils.py +0 -0
  80. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_array.py +0 -0
  81. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  82. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_encoded.py +0 -0
  83. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_fixedsizearray.py +0 -0
  84. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_representations.py +0 -0
  85. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_scalar.py +0 -0
  86. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_struct.py +0 -0
  87. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_table.py +0 -0
  88. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_table_eval.py +0 -0
  89. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_vectorofvectors.py +0 -0
  90. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_vovutils.py +0 -0
  91. {legend_pydataobj-1.7.1 → legend_pydataobj-1.8.0}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.7.1
3
+ Version: 1.8.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.7.1
3
+ Version: 1.8.0
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.7.1'
16
- __version_tuple__ = version_tuple = (1, 7, 1)
15
+ __version__ = version = '1.8.0'
16
+ __version_tuple__ = version_tuple = (1, 8, 0)
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
6
+ from . import utils
7
+ from .ndarray import _h5_read_ndarray
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def _h5_read_array_generic(type_, h5d, **kwargs):
13
+ nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, **kwargs)
14
+
15
+ obj_buf = kwargs["obj_buf"]
16
+
17
+ if obj_buf is None:
18
+ return type_(nda=nda, attrs=attrs), n_rows_to_read
19
+
20
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5d)
21
+
22
+ return obj_buf, n_rows_to_read
23
+
24
+
25
+ def _h5_read_array(h5d, **kwargs):
26
+ return _h5_read_array_generic(Array, h5d, **kwargs)
27
+
28
+
29
+ def _h5_read_fixedsize_array(h5d, **kwargs):
30
+ return _h5_read_array_generic(FixedSizeArray, h5d, **kwargs)
31
+
32
+
33
+ def _h5_read_array_of_equalsized_arrays(h5d, **kwargs):
34
+ return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, **kwargs)
@@ -40,8 +40,7 @@ log = logging.getLogger(__name__)
40
40
 
41
41
 
42
42
  def _h5_read_lgdo(
43
- name,
44
- h5f,
43
+ h5o,
45
44
  start_row=0,
46
45
  n_rows=sys.maxsize,
47
46
  idx=None,
@@ -52,11 +51,11 @@ def _h5_read_lgdo(
52
51
  decompress=True,
53
52
  ):
54
53
  # Handle list-of-files recursively
55
- if not isinstance(h5f, (str, h5py.File)):
56
- lh5_file = list(h5f)
54
+ if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
55
+ lh5_objs = list(h5o)
57
56
  n_rows_read = 0
58
57
 
59
- for i, _h5f in enumerate(lh5_file):
58
+ for i, _h5o in enumerate(lh5_objs):
60
59
  if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
61
60
  # a list of lists: must be one per file
62
61
  idx_i = idx[i]
@@ -65,7 +64,7 @@ def _h5_read_lgdo(
65
64
  if not (isinstance(idx, tuple) and len(idx) == 1):
66
65
  idx = (idx,)
67
66
  # idx is a long continuous array
68
- n_rows_i = read_n_rows(name, _h5f)
67
+ n_rows_i = read_n_rows(_h5o)
69
68
  # find the length of the subset of idx that contains indices
70
69
  # that are less than n_rows_i
71
70
  n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
@@ -77,8 +76,7 @@ def _h5_read_lgdo(
77
76
  n_rows_i = n_rows - n_rows_read
78
77
 
79
78
  obj_buf, n_rows_read_i = _h5_read_lgdo(
80
- name,
81
- _h5f,
79
+ _h5o,
82
80
  start_row=start_row,
83
81
  n_rows=n_rows_i,
84
82
  idx=idx_i,
@@ -97,11 +95,8 @@ def _h5_read_lgdo(
97
95
 
98
96
  return obj_buf, n_rows_read
99
97
 
100
- if not isinstance(h5f, h5py.File):
101
- h5f = h5py.File(h5f, mode="r")
102
-
103
98
  log.debug(
104
- f"reading {h5f.filename}:{name}[{start_row}:{n_rows}], decompress = {decompress}, "
99
+ f"reading {h5o.file.filename}:{h5o.name}[{start_row}:{n_rows}], decompress = {decompress}, "
105
100
  + (f" with field mask {field_mask}" if field_mask else "")
106
101
  )
107
102
 
@@ -110,15 +105,14 @@ def _h5_read_lgdo(
110
105
  idx = (idx,)
111
106
 
112
107
  try:
113
- lgdotype = dtypeutils.datatype(h5f[name].attrs["datatype"])
108
+ lgdotype = dtypeutils.datatype(h5o.attrs["datatype"])
114
109
  except KeyError as e:
115
110
  msg = "dataset not in file or missing 'datatype' attribute"
116
- raise LH5DecodeError(msg, h5f, name) from e
111
+ raise LH5DecodeError(msg, h5o) from e
117
112
 
118
113
  if lgdotype is Scalar:
119
114
  return _h5_read_scalar(
120
- name,
121
- h5f,
115
+ h5o,
122
116
  obj_buf=obj_buf,
123
117
  )
124
118
 
@@ -138,8 +132,7 @@ def _h5_read_lgdo(
138
132
 
139
133
  if lgdotype is Struct:
140
134
  return _h5_read_struct(
141
- name,
142
- h5f,
135
+ h5o,
143
136
  start_row=start_row,
144
137
  n_rows=n_rows,
145
138
  idx=idx,
@@ -164,8 +157,7 @@ def _h5_read_lgdo(
164
157
 
165
158
  if lgdotype is Table:
166
159
  return _h5_read_table(
167
- name,
168
- h5f,
160
+ h5o,
169
161
  start_row=start_row,
170
162
  n_rows=n_rows,
171
163
  idx=idx,
@@ -178,8 +170,7 @@ def _h5_read_lgdo(
178
170
 
179
171
  if lgdotype is ArrayOfEncodedEqualSizedArrays:
180
172
  return _h5_read_array_of_encoded_equalsized_arrays(
181
- name,
182
- h5f,
173
+ h5o,
183
174
  start_row=start_row,
184
175
  n_rows=n_rows,
185
176
  idx=idx,
@@ -191,8 +182,7 @@ def _h5_read_lgdo(
191
182
 
192
183
  if lgdotype is VectorOfEncodedVectors:
193
184
  return _h5_read_vector_of_encoded_vectors(
194
- name,
195
- h5f,
185
+ h5o,
196
186
  start_row=start_row,
197
187
  n_rows=n_rows,
198
188
  idx=idx,
@@ -204,8 +194,7 @@ def _h5_read_lgdo(
204
194
 
205
195
  if lgdotype is VectorOfVectors:
206
196
  return _h5_read_vector_of_vectors(
207
- name,
208
- h5f,
197
+ h5o,
209
198
  start_row=start_row,
210
199
  n_rows=n_rows,
211
200
  idx=idx,
@@ -216,8 +205,7 @@ def _h5_read_lgdo(
216
205
 
217
206
  if lgdotype is FixedSizeArray:
218
207
  return _h5_read_fixedsize_array(
219
- name,
220
- h5f,
208
+ h5o,
221
209
  start_row=start_row,
222
210
  n_rows=n_rows,
223
211
  idx=idx,
@@ -228,8 +216,7 @@ def _h5_read_lgdo(
228
216
 
229
217
  if lgdotype is ArrayOfEqualSizedArrays:
230
218
  return _h5_read_array_of_equalsized_arrays(
231
- name,
232
- h5f,
219
+ h5o,
233
220
  start_row=start_row,
234
221
  n_rows=n_rows,
235
222
  idx=idx,
@@ -240,8 +227,7 @@ def _h5_read_lgdo(
240
227
 
241
228
  if lgdotype is Array:
242
229
  return _h5_read_array(
243
- name,
244
- h5f,
230
+ h5o,
245
231
  start_row=start_row,
246
232
  n_rows=n_rows,
247
233
  idx=idx,
@@ -251,12 +237,11 @@ def _h5_read_lgdo(
251
237
  )
252
238
 
253
239
  msg = f"no rule to decode {lgdotype.__name__} from LH5"
254
- raise LH5DecodeError(msg, h5f, name)
240
+ raise LH5DecodeError(msg, h5o)
255
241
 
256
242
 
257
243
  def _h5_read_struct(
258
- name,
259
- h5f,
244
+ h5g,
260
245
  start_row=0,
261
246
  n_rows=sys.maxsize,
262
247
  idx=None,
@@ -269,7 +254,7 @@ def _h5_read_struct(
269
254
  # table... Maybe should emit a warning? Or allow them to be
270
255
  # dicts keyed by field name?
271
256
 
272
- attrs = dict(h5f[name].attrs)
257
+ attrs = dict(h5g.attrs)
273
258
 
274
259
  # determine fields to be read out
275
260
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -288,8 +273,7 @@ def _h5_read_struct(
288
273
  # support for integer keys
289
274
  field_key = int(field) if attrs.get("int_keys") else str(field)
290
275
  obj_dict[field_key], _ = _h5_read_lgdo(
291
- f"{name}/{field}",
292
- h5f,
276
+ h5g[field],
293
277
  start_row=start_row,
294
278
  n_rows=n_rows,
295
279
  idx=idx,
@@ -301,8 +285,7 @@ def _h5_read_struct(
301
285
 
302
286
 
303
287
  def _h5_read_table(
304
- name,
305
- h5f,
288
+ h5g,
306
289
  start_row=0,
307
290
  n_rows=sys.maxsize,
308
291
  idx=None,
@@ -314,9 +297,9 @@ def _h5_read_table(
314
297
  ):
315
298
  if obj_buf is not None and not isinstance(obj_buf, Table):
316
299
  msg = "provided object buffer is not a Table"
317
- raise LH5DecodeError(msg, h5f, name)
300
+ raise LH5DecodeError(msg, h5g)
318
301
 
319
- attrs = dict(h5f[name].attrs)
302
+ attrs = dict(h5g.attrs)
320
303
 
321
304
  # determine fields to be read out
322
305
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -337,13 +320,12 @@ def _h5_read_table(
337
320
  if obj_buf is not None:
338
321
  if not isinstance(obj_buf, Table) or field not in obj_buf:
339
322
  msg = "provided object buffer is not a Table or columns are missing"
340
- raise LH5DecodeError(msg, h5f, name)
323
+ raise LH5DecodeError(msg, h5g)
341
324
 
342
325
  fld_buf = obj_buf[field]
343
326
 
344
327
  col_dict[field], n_rows_read = _h5_read_lgdo(
345
- f"{name}/{field}",
346
- h5f,
328
+ h5g[field],
347
329
  start_row=start_row,
348
330
  n_rows=n_rows,
349
331
  idx=idx,
@@ -363,12 +345,12 @@ def _h5_read_table(
363
345
  n_rows_read = rows_read[0]
364
346
  else:
365
347
  n_rows_read = 0
366
- log.warning(f"Table '{name}' has no fields specified by {field_mask=}")
348
+ log.warning(f"Table '{h5g.name}' has no fields specified by {field_mask=}")
367
349
 
368
350
  for n in rows_read[1:]:
369
351
  if n != n_rows_read:
370
352
  log.warning(
371
- f"Table '{name}' got strange n_rows_read = {n}, "
353
+ f"Table '{h5g.name}' got strange n_rows_read = {n}, "
372
354
  "{n_rows_read} was expected ({rows_read})"
373
355
  )
374
356
 
@@ -400,6 +382,6 @@ def _h5_read_table(
400
382
  obj_buf.loc = obj_buf_start + n_rows_read
401
383
 
402
384
  # check attributes
403
- utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5f, name)
385
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5g)
404
386
 
405
387
  return obj_buf, n_rows_read
@@ -19,25 +19,22 @@ log = logging.getLogger(__name__)
19
19
 
20
20
 
21
21
  def _h5_read_array_of_encoded_equalsized_arrays(
22
- name,
23
- h5f,
22
+ h5g,
24
23
  **kwargs,
25
24
  ):
26
- return _h5_read_encoded_array(ArrayOfEncodedEqualSizedArrays, name, h5f, **kwargs)
25
+ return _h5_read_encoded_array(ArrayOfEncodedEqualSizedArrays, h5g, **kwargs)
27
26
 
28
27
 
29
28
  def _h5_read_vector_of_encoded_vectors(
30
- name,
31
- h5f,
29
+ h5g,
32
30
  **kwargs,
33
31
  ):
34
- return _h5_read_encoded_array(VectorOfEncodedVectors, name, h5f, **kwargs)
32
+ return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
35
33
 
36
34
 
37
35
  def _h5_read_encoded_array(
38
36
  lgdotype,
39
- name,
40
- h5f,
37
+ h5g,
41
38
  start_row=0,
42
39
  n_rows=sys.maxsize,
43
40
  idx=None,
@@ -48,11 +45,11 @@ def _h5_read_encoded_array(
48
45
  ):
49
46
  if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
50
47
  msg = f"unsupported read of encoded type {lgdotype.__name__}"
51
- raise LH5DecodeError(msg, h5f, name)
48
+ raise LH5DecodeError(msg, h5g)
52
49
 
53
50
  if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
54
51
  msg = f"object buffer is not a {lgdotype.__name__}"
55
- raise LH5DecodeError(msg, h5f, name)
52
+ raise LH5DecodeError(msg, h5g)
56
53
 
57
54
  # read out decoded_size, either a Scalar or an Array
58
55
  decoded_size_buf = encoded_data_buf = None
@@ -62,8 +59,7 @@ def _h5_read_encoded_array(
62
59
 
63
60
  if lgdotype is VectorOfEncodedVectors:
64
61
  decoded_size, _ = _h5_read_array(
65
- f"{name}/decoded_size",
66
- h5f,
62
+ h5g["decoded_size"],
67
63
  start_row=start_row,
68
64
  n_rows=n_rows,
69
65
  idx=idx,
@@ -74,15 +70,13 @@ def _h5_read_encoded_array(
74
70
 
75
71
  else:
76
72
  decoded_size, _ = _h5_read_scalar(
77
- f"{name}/decoded_size",
78
- h5f,
73
+ h5g["decoded_size"],
79
74
  obj_buf=None if decompress else decoded_size_buf,
80
75
  )
81
76
 
82
77
  # read out encoded_data, a VectorOfVectors
83
78
  encoded_data, n_rows_read = _h5_read_vector_of_vectors(
84
- f"{name}/encoded_data",
85
- h5f,
79
+ h5g["encoded_data"],
86
80
  start_row=start_row,
87
81
  n_rows=n_rows,
88
82
  idx=idx,
@@ -99,7 +93,7 @@ def _h5_read_encoded_array(
99
93
  rawdata = lgdotype(
100
94
  encoded_data=encoded_data,
101
95
  decoded_size=decoded_size,
102
- attrs=h5f[name].attrs,
96
+ attrs=dict(h5g.attrs),
103
97
  )
104
98
 
105
99
  # already return if no decompression is requested
@@ -14,8 +14,7 @@ log = logging.getLogger(__name__)
14
14
 
15
15
 
16
16
  def _h5_read_ndarray(
17
- name,
18
- h5f,
17
+ h5d,
19
18
  start_row=0,
20
19
  n_rows=sys.maxsize,
21
20
  idx=None,
@@ -25,16 +24,16 @@ def _h5_read_ndarray(
25
24
  ):
26
25
  if obj_buf is not None and not isinstance(obj_buf, Array):
27
26
  msg = "object buffer is not an Array"
28
- raise LH5DecodeError(msg, h5f, name)
27
+ raise LH5DecodeError(msg, h5d)
29
28
 
30
29
  # compute the number of rows to read
31
30
  # we culled idx above for start_row and n_rows, now we have to apply
32
31
  # the constraint of the length of the dataset
33
32
  try:
34
- ds_n_rows = h5f[name].shape[0]
33
+ ds_n_rows = h5d.shape[0]
35
34
  except AttributeError as e:
36
35
  msg = "does not seem to be an HDF5 dataset"
37
- raise LH5DecodeError(msg, h5f, name) from e
36
+ raise LH5DecodeError(msg, h5d) from e
38
37
 
39
38
  if idx is not None:
40
39
  if len(idx[0]) > 0 and idx[0][-1] >= ds_n_rows:
@@ -78,23 +77,23 @@ def _h5_read_ndarray(
78
77
  # this is required to make the read of multiple files faster
79
78
  # until a better solution found.
80
79
  if change_idx_to_slice or idx is None or use_h5idx:
81
- h5f[name].read_direct(obj_buf.nda, source_sel, dest_sel)
80
+ h5d.read_direct(obj_buf.nda, source_sel, dest_sel)
82
81
  else:
83
82
  # it is faster to read the whole object and then do fancy indexing
84
- obj_buf.nda[dest_sel] = h5f[name][...][source_sel]
83
+ obj_buf.nda[dest_sel] = h5d[...][source_sel]
85
84
 
86
85
  nda = obj_buf.nda
87
86
  elif n_rows == 0:
88
- tmp_shape = (0,) + h5f[name].shape[1:]
89
- nda = np.empty(tmp_shape, h5f[name].dtype)
87
+ tmp_shape = (0,) + h5d.shape[1:]
88
+ nda = np.empty(tmp_shape, h5d.dtype)
90
89
  elif change_idx_to_slice or idx is None or use_h5idx:
91
- nda = h5f[name][source_sel]
90
+ nda = h5d[source_sel]
92
91
  else:
93
92
  # it is faster to read the whole object and then do fancy indexing
94
- nda = h5f[name][...][source_sel]
93
+ nda = h5d[...][source_sel]
95
94
 
96
95
  # Finally, set attributes and return objects
97
- attrs = h5f[name].attrs
96
+ attrs = dict(h5d.attrs)
98
97
 
99
98
  # special handling for bools
100
99
  # (c and Julia store as uint8 so cast to bool)
@@ -11,24 +11,24 @@ log = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  def _h5_read_scalar(
14
- name,
15
- h5f,
14
+ h5d,
16
15
  obj_buf=None,
17
16
  ):
18
- value = h5f[name][()]
17
+ value = h5d[()]
18
+ attrs = dict(h5d.attrs)
19
19
 
20
20
  # special handling for bools
21
21
  # (c and Julia store as uint8 so cast to bool)
22
- if h5f[name].attrs["datatype"] == "bool":
22
+ if attrs["datatype"] == "bool":
23
23
  value = np.bool_(value)
24
24
 
25
25
  if obj_buf is not None:
26
26
  if not isinstance(obj_buf, Scalar):
27
27
  msg = "object buffer a Scalar"
28
- raise LH5DecodeError(msg, h5f, name)
28
+ raise LH5DecodeError(msg, h5d)
29
29
 
30
30
  obj_buf.value = value
31
- obj_buf.attrs.update(h5f[name].attrs)
31
+ obj_buf.attrs.update(attrs)
32
32
  return obj_buf, 1
33
33
 
34
- return Scalar(value=value, attrs=h5f[name].attrs), 1
34
+ return Scalar(value=value, attrs=attrs), 1
@@ -3,10 +3,10 @@ from __future__ import annotations
3
3
  from ...exceptions import LH5DecodeError
4
4
 
5
5
 
6
- def check_obj_buf_attrs(attrs, new_attrs, file, name):
6
+ def check_obj_buf_attrs(attrs, new_attrs, obj):
7
7
  if set(attrs.keys()) != set(new_attrs.keys()):
8
8
  msg = (
9
9
  f"existing buffer and new data chunk have different attributes: "
10
- f"obj_buf.attrs={attrs} != {file.filename}[{name}].attrs={new_attrs}"
10
+ f"obj_buf.attrs={attrs} != {obj.file.filename}[{obj.name}].attrs={new_attrs}"
11
11
  )
12
- raise LH5DecodeError(msg, file, name)
12
+ raise LH5DecodeError(msg, obj)
@@ -20,8 +20,7 @@ log = logging.getLogger(__name__)
20
20
 
21
21
 
22
22
  def _h5_read_vector_of_vectors(
23
- name,
24
- h5f,
23
+ h5g,
25
24
  start_row=0,
26
25
  n_rows=sys.maxsize,
27
26
  idx=None,
@@ -31,13 +30,12 @@ def _h5_read_vector_of_vectors(
31
30
  ):
32
31
  if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
33
32
  msg = "object buffer is not a VectorOfVectors"
34
- raise LH5DecodeError(msg, h5f, name)
33
+ raise LH5DecodeError(msg, h5g)
35
34
 
36
35
  # read out cumulative_length
37
36
  cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
38
37
  cumulative_length, n_rows_read = _h5_read_array(
39
- f"{name}/cumulative_length",
40
- h5f,
38
+ h5g["cumulative_length"],
41
39
  start_row=start_row,
42
40
  n_rows=n_rows,
43
41
  idx=idx,
@@ -63,8 +61,7 @@ def _h5_read_vector_of_vectors(
63
61
  fd_start = 0 # this variable avoids an ndarray append
64
62
 
65
63
  fd_starts, fds_n_rows_read = _h5_read_array(
66
- f"{name}/cumulative_length",
67
- h5f,
64
+ h5g["cumulative_length"],
68
65
  start_row=start_row,
69
66
  n_rows=n_rows,
70
67
  idx=idx2,
@@ -101,7 +98,7 @@ def _h5_read_vector_of_vectors(
101
98
  # need to read out the cumulen sample -before- the first sample
102
99
  # read above in order to get the starting row of the first
103
100
  # vector to read out in flattened_data
104
- fd_start = h5f[f"{name}/cumulative_length"][start_row - 1]
101
+ fd_start = h5g["cumulative_length"][start_row - 1]
105
102
 
106
103
  # check limits for values that will be used subsequently
107
104
  if this_cumulen_nda[-1] < fd_start:
@@ -115,7 +112,7 @@ def _h5_read_vector_of_vectors(
115
112
  f"cumulative_length non-increasing between entries "
116
113
  f"{start_row} and {start_row+n_rows_read}"
117
114
  )
118
- raise LH5DecodeError(msg, h5f, name)
115
+ raise LH5DecodeError(msg, h5g)
119
116
 
120
117
  # determine the number of rows for the flattened_data readout
121
118
  fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
@@ -147,18 +144,17 @@ def _h5_read_vector_of_vectors(
147
144
  fd_buf.resize(fdb_size)
148
145
 
149
146
  # now read
150
- lgdotype = dtypeutils.datatype(h5f[f"{name}/flattened_data"].attrs["datatype"])
147
+ lgdotype = dtypeutils.datatype(h5g["flattened_data"].attrs["datatype"])
151
148
  if lgdotype is Array:
152
149
  _func = _h5_read_array
153
150
  elif lgdotype is VectorOfVectors:
154
151
  _func = _h5_read_vector_of_vectors
155
152
  else:
156
153
  msg = "type {lgdotype.__name__} is not supported"
157
- raise LH5DecodeError(msg, h5f, f"{name}/flattened_data")
154
+ raise LH5DecodeError(msg, h5g, "flattened_data")
158
155
 
159
156
  flattened_data, _ = _func(
160
- f"{name}/flattened_data",
161
- h5f,
157
+ h5g["flattened_data"],
162
158
  start_row=fd_start,
163
159
  n_rows=fd_n_rows,
164
160
  idx=fd_idx,
@@ -180,7 +176,7 @@ def _h5_read_vector_of_vectors(
180
176
  VectorOfVectors(
181
177
  flattened_data=flattened_data,
182
178
  cumulative_length=cumulative_length,
183
- attrs=h5f[name].attrs,
179
+ attrs=dict(h5g.attrs),
184
180
  ),
185
181
  n_rows_read,
186
182
  )
@@ -107,9 +107,20 @@ def read(
107
107
  `n_rows_read` will be``1``. For tables it is redundant with
108
108
  ``table.loc``. If `obj_buf` is ``None``, only `object` is returned.
109
109
  """
110
+ if isinstance(lh5_file, h5py.File):
111
+ lh5_obj = lh5_file[name]
112
+ elif isinstance(lh5_file, str):
113
+ lh5_file = h5py.File(lh5_file, mode="r")
114
+ lh5_obj = lh5_file[name]
115
+ else:
116
+ lh5_obj = []
117
+ for h5f in lh5_file:
118
+ if isinstance(lh5_file, str):
119
+ h5f = h5py.File(h5f, mode="r") # noqa: PLW2901
120
+ lh5_obj += h5f[name]
121
+
110
122
  obj, n_rows_read = _serializers._h5_read_lgdo(
111
- name,
112
- lh5_file,
123
+ lh5_obj,
113
124
  start_row=start_row,
114
125
  n_rows=n_rows,
115
126
  idx=idx,
@@ -4,11 +4,11 @@ import h5py
4
4
 
5
5
 
6
6
  class LH5DecodeError(Exception):
7
- def __init__(self, message: str, file: str, obj: str) -> None:
7
+ def __init__(self, message: str, obj: h5py.Dataset | h5py.Group) -> None:
8
8
  super().__init__(message)
9
9
 
10
- self.file = file.filename if isinstance(file, h5py.File) else file
11
- self.obj = obj
10
+ self.file = obj.file.filename
11
+ self.obj = obj.name
12
12
 
13
13
  def __str__(self) -> str:
14
14
  return (
@@ -144,13 +144,12 @@ class LH5Store:
144
144
  """
145
145
  # grab files from store
146
146
  if not isinstance(lh5_file, (str, h5py.File)):
147
- lh5_file = [self.gimme_file(f, "r") for f in list(lh5_file)]
147
+ lh5_obj = [self.gimme_file(f, "r")[name] for f in list(lh5_file)]
148
148
  else:
149
- lh5_file = self.gimme_file(lh5_file, "r")
149
+ lh5_obj = self.gimme_file(lh5_file, "r")[name]
150
150
 
151
151
  return _serializers._h5_read_lgdo(
152
- name,
153
- lh5_file,
152
+ lh5_obj,
154
153
  start_row=start_row,
155
154
  n_rows=n_rows,
156
155
  idx=idx,
@@ -29,7 +29,7 @@ def get_buffer(
29
29
  Sets size to `size` if object has a size.
30
30
  """
31
31
  obj, n_rows = _serializers._h5_read_lgdo(
32
- name, lh5_file, n_rows=0, field_mask=field_mask
32
+ lh5_file[name], n_rows=0, field_mask=field_mask
33
33
  )
34
34
 
35
35
  if hasattr(obj, "resize") and size is not None:
@@ -1,34 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
-
5
- from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
6
- from . import utils
7
- from .ndarray import _h5_read_ndarray
8
-
9
- log = logging.getLogger(__name__)
10
-
11
-
12
- def _h5_read_array_generic(type_, name, h5f, **kwargs):
13
- nda, attrs, n_rows_to_read = _h5_read_ndarray(name, h5f, **kwargs)
14
-
15
- obj_buf = kwargs["obj_buf"]
16
-
17
- if obj_buf is None:
18
- return type_(nda=nda, attrs=attrs), n_rows_to_read
19
-
20
- utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5f, name)
21
-
22
- return obj_buf, n_rows_to_read
23
-
24
-
25
- def _h5_read_array(name, h5f, **kwargs):
26
- return _h5_read_array_generic(Array, name, h5f, **kwargs)
27
-
28
-
29
- def _h5_read_fixedsize_array(name, h5f, **kwargs):
30
- return _h5_read_array_generic(FixedSizeArray, name, h5f, **kwargs)
31
-
32
-
33
- def _h5_read_array_of_equalsized_arrays(name, h5f, **kwargs):
34
- return _h5_read_array_generic(ArrayOfEqualSizedArrays, name, h5f, **kwargs)