legend-pydataobj 1.9.0__tar.gz → 1.10.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/PKG-INFO +2 -2
  2. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/pyproject.toml +3 -1
  3. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/PKG-INFO +2 -2
  4. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/requires.txt +1 -1
  5. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/_version.py +2 -2
  6. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/radware.py +8 -16
  7. legend_pydataobj-1.10.1/src/lgdo/lh5/_serializers/read/array.py +34 -0
  8. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/composite.py +67 -78
  9. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/encoded.py +31 -9
  10. legend_pydataobj-1.10.1/src/lgdo/lh5/_serializers/read/ndarray.py +116 -0
  11. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/scalar.py +10 -3
  12. legend_pydataobj-1.10.1/src/lgdo/lh5/_serializers/read/utils.py +174 -0
  13. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +36 -14
  14. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/array.py +6 -1
  15. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/composite.py +14 -5
  16. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/scalar.py +6 -1
  17. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/core.py +81 -7
  18. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/exceptions.py +3 -3
  19. legend_pydataobj-1.10.1/src/lgdo/lh5/iterator.py +498 -0
  20. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/store.py +116 -12
  21. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/tools.py +1 -1
  22. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/utils.py +29 -44
  23. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/histogram.py +122 -6
  24. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/table.py +2 -2
  25. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/vectorofvectors.py +1 -1
  26. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/conftest.py +1 -1
  27. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_core.py +6 -0
  28. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_iterator.py +83 -2
  29. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_store.py +87 -0
  30. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_write.py +12 -2
  31. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_histogram.py +106 -4
  32. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_vectorofvectors.py +1 -1
  33. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_vovutils.py +1 -1
  34. legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/array.py +0 -34
  35. legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/ndarray.py +0 -103
  36. legend_pydataobj-1.9.0/src/lgdo/lh5/_serializers/read/utils.py +0 -12
  37. legend_pydataobj-1.9.0/src/lgdo/lh5/iterator.py +0 -314
  38. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/LICENSE +0 -0
  39. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/README.md +0 -0
  40. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/setup.cfg +0 -0
  41. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -0
  42. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  43. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
  44. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  45. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  46. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/__init__.py +0 -0
  47. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/cli.py +0 -0
  48. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/__init__.py +0 -0
  49. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/base.py +0 -0
  50. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/generic.py +0 -0
  51. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/utils.py +0 -0
  52. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/compression/varlen.py +0 -0
  53. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lgdo_utils.py +0 -0
  54. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/__init__.py +0 -0
  55. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/__init__.py +0 -0
  56. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  57. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  58. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
  59. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5/datatype.py +0 -0
  60. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/lh5_store.py +0 -0
  61. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/logging.py +0 -0
  62. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/__init__.py +0 -0
  63. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/array.py +0 -0
  64. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  65. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/encoded.py +0 -0
  66. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/fixedsizearray.py +0 -0
  67. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/lgdo.py +0 -0
  68. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/scalar.py +0 -0
  69. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/struct.py +0 -0
  70. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/vovutils.py +0 -0
  71. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/types/waveformtable.py +0 -0
  72. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/units.py +0 -0
  73. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/src/lgdo/utils.py +0 -0
  74. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  75. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  76. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_compression.py +0 -0
  77. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_radware_sigcompress.py +0 -0
  78. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_str2wfcodec.py +0 -0
  79. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
  80. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/conftest.py +0 -0
  81. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/conftest.py +0 -0
  82. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_datatype.py +0 -0
  83. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_tools.py +0 -0
  84. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/lh5/test_lh5_utils.py +0 -0
  85. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/test_cli.py +0 -0
  86. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/test_lgdo_utils.py +0 -0
  87. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_array.py +0 -0
  88. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  89. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_encoded.py +0 -0
  90. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_fixedsizearray.py +0 -0
  91. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_representations.py +0 -0
  92. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_scalar.py +0 -0
  93. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_struct.py +0 -0
  94. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_table.py +0 -0
  95. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_table_eval.py +0 -0
  96. {legend_pydataobj-1.9.0 → legend_pydataobj-1.10.1}/tests/types/test_waveformtable.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.9.0
3
+ Version: 1.10.1
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -700,7 +700,7 @@ License-File: LICENSE
700
700
  Requires-Dist: awkward>=2
701
701
  Requires-Dist: awkward-pandas
702
702
  Requires-Dist: colorlog
703
- Requires-Dist: h5py>=3.2
703
+ Requires-Dist: h5py>=3.10
704
704
  Requires-Dist: hdf5plugin
705
705
  Requires-Dist: hist
706
706
  Requires-Dist: numba!=0.53.*,!=0.54.*
@@ -34,7 +34,7 @@ dependencies = [
34
34
  "awkward>=2",
35
35
  "awkward-pandas",
36
36
  "colorlog",
37
- "h5py>=3.2",
37
+ "h5py>=3.10",
38
38
  "hdf5plugin",
39
39
  "hist",
40
40
  "numba!=0.53.*,!=0.54.*",
@@ -137,6 +137,7 @@ ignore = [
137
137
  "PLR2004", # Magic value used in comparison
138
138
  "ISC001", # Conflicts with formatter
139
139
  "PT011",
140
+ "RUF013", # complains if you default to None for an asinine reason
140
141
  ]
141
142
  isort.required-imports = ["from __future__ import annotations"]
142
143
  # Uncomment if using a _compat.typing backport
@@ -145,6 +146,7 @@ isort.required-imports = ["from __future__ import annotations"]
145
146
  [tool.ruff.lint.per-file-ignores]
146
147
  "tests/**" = ["T20"]
147
148
  "noxfile.py" = ["T20"]
149
+ "docs/source/notebooks/*" = ["T201", "E402"]
148
150
 
149
151
  [tool.pylint]
150
152
  py-version = "3.8"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: legend_pydataobj
3
- Version: 1.9.0
3
+ Version: 1.10.1
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -700,7 +700,7 @@ License-File: LICENSE
700
700
  Requires-Dist: awkward>=2
701
701
  Requires-Dist: awkward-pandas
702
702
  Requires-Dist: colorlog
703
- Requires-Dist: h5py>=3.2
703
+ Requires-Dist: h5py>=3.10
704
704
  Requires-Dist: hdf5plugin
705
705
  Requires-Dist: hist
706
706
  Requires-Dist: numba!=0.53.*,!=0.54.*
@@ -1,7 +1,7 @@
1
1
  awkward>=2
2
2
  awkward-pandas
3
3
  colorlog
4
- h5py>=3.2
4
+ h5py>=3.10
5
5
  hdf5plugin
6
6
  hist
7
7
  numba!=0.53.*,!=0.54.*
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '1.9.0'
16
- __version_tuple__ = version_tuple = (1, 9, 0)
15
+ __version__ = version = '1.10.1'
16
+ __version_tuple__ = version_tuple = (1, 10, 1)
@@ -441,15 +441,11 @@ def _radware_sigcompress_encode(
441
441
  while (i < sig_in.size) and (i < j + 48):
442
442
  si_i = int16(sig_in[i] + shift)
443
443
  si_im1 = int16(sig_in[i - 1] + shift)
444
- if max1 < si_i:
445
- max1 = si_i
446
- if min1 > si_i:
447
- min1 = si_i
444
+ max1 = max(max1, si_i)
445
+ min1 = min(min1, si_i)
448
446
  ds = si_i - si_im1
449
- if max2 < ds:
450
- max2 = ds
451
- if min2 > ds:
452
- min2 = ds
447
+ max2 = max(max2, ds)
448
+ min2 = min(min2, ds)
453
449
  nw += 1
454
450
  i += 1
455
451
  if max1 - min1 <= max2 - min2: # use absolute values
@@ -460,15 +456,13 @@ def _radware_sigcompress_encode(
460
456
  i < j + 128
461
457
  ): # FIXME: 128 could be tuned better?
462
458
  si_i = int16(sig_in[i] + shift)
463
- if max1 < si_i:
464
- max1 = si_i
459
+ max1 = max(max1, si_i)
465
460
  dd1 = max1 - min1
466
461
  if min1 > si_i:
467
462
  dd1 = max1 - si_i
468
463
  if dd1 > mask[nb1]:
469
464
  break
470
- if min1 > si_i:
471
- min1 = si_i
465
+ min1 = min(min1, si_i)
472
466
  nw += 1
473
467
  i += 1
474
468
  else: # use difference values
@@ -481,15 +475,13 @@ def _radware_sigcompress_encode(
481
475
  si_i = int16(sig_in[i] + shift)
482
476
  si_im1 = int16(sig_in[i - 1] + shift)
483
477
  ds = si_i - si_im1
484
- if max2 < ds:
485
- max2 = ds
478
+ max2 = max(max2, ds)
486
479
  dd2 = max2 - min2
487
480
  if min2 > ds:
488
481
  dd2 = max2 - ds
489
482
  if dd2 > mask[nb2]:
490
483
  break
491
- if min2 > ds:
492
- min2 = ds
484
+ min2 = min(min2, ds)
493
485
  nw += 1
494
486
  i += 1
495
487
 
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+
5
+ from ....types import Array, ArrayOfEqualSizedArrays, FixedSizeArray
6
+ from . import utils
7
+ from .ndarray import _h5_read_ndarray
8
+
9
+ log = logging.getLogger(__name__)
10
+
11
+
12
+ def _h5_read_array_generic(type_, h5d, fname, oname, **kwargs):
13
+ nda, attrs, n_rows_to_read = _h5_read_ndarray(h5d, fname, oname, **kwargs)
14
+
15
+ obj_buf = kwargs["obj_buf"]
16
+
17
+ if obj_buf is None:
18
+ return type_(nda=nda, attrs=attrs), n_rows_to_read
19
+
20
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
21
+
22
+ return obj_buf, n_rows_to_read
23
+
24
+
25
+ def _h5_read_array(h5d, fname, oname, **kwargs):
26
+ return _h5_read_array_generic(Array, h5d, fname, oname, **kwargs)
27
+
28
+
29
+ def _h5_read_fixedsize_array(h5d, fname, oname, **kwargs):
30
+ return _h5_read_array_generic(FixedSizeArray, h5d, fname, oname, **kwargs)
31
+
32
+
33
+ def _h5_read_array_of_equalsized_arrays(h5d, fname, oname, **kwargs):
34
+ return _h5_read_array_generic(ArrayOfEqualSizedArrays, h5d, fname, oname, **kwargs)
@@ -23,7 +23,6 @@ from ....types import (
23
23
  )
24
24
  from ... import datatype as dtypeutils
25
25
  from ...exceptions import LH5DecodeError
26
- from ...utils import read_n_rows
27
26
  from . import utils
28
27
  from .array import (
29
28
  _h5_read_array,
@@ -42,6 +41,8 @@ log = logging.getLogger(__name__)
42
41
 
43
42
  def _h5_read_lgdo(
44
43
  h5o,
44
+ fname,
45
+ oname,
45
46
  start_row=0,
46
47
  n_rows=sys.maxsize,
47
48
  idx=None,
@@ -51,69 +52,23 @@ def _h5_read_lgdo(
51
52
  obj_buf_start=0,
52
53
  decompress=True,
53
54
  ):
54
- # Handle list-of-files recursively
55
- if not isinstance(h5o, (h5py.Group, h5py.Dataset)):
56
- lh5_objs = list(h5o)
57
- n_rows_read = 0
58
-
59
- for i, _h5o in enumerate(lh5_objs):
60
- if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
61
- # a list of lists: must be one per file
62
- idx_i = idx[i]
63
- elif idx is not None:
64
- # make idx a proper tuple if it's not one already
65
- if not (isinstance(idx, tuple) and len(idx) == 1):
66
- idx = (idx,)
67
- # idx is a long continuous array
68
- n_rows_i = read_n_rows(_h5o)
69
- # find the length of the subset of idx that contains indices
70
- # that are less than n_rows_i
71
- n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
72
- # now split idx into idx_i and the remainder
73
- idx_i = (idx[0][:n_rows_to_read_i],)
74
- idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
75
- else:
76
- idx_i = None
77
- n_rows_i = n_rows - n_rows_read
78
-
79
- obj_buf, n_rows_read_i = _h5_read_lgdo(
80
- _h5o,
81
- start_row=start_row,
82
- n_rows=n_rows_i,
83
- idx=idx_i,
84
- use_h5idx=use_h5idx,
85
- field_mask=field_mask,
86
- obj_buf=obj_buf,
87
- obj_buf_start=obj_buf_start,
88
- decompress=decompress,
89
- )
90
-
91
- n_rows_read += n_rows_read_i
92
- if n_rows_read >= n_rows or obj_buf is None:
93
- return obj_buf, n_rows_read
94
- start_row = 0
95
- obj_buf_start += n_rows_read_i
96
-
97
- return obj_buf, n_rows_read
98
-
99
55
  log.debug(
100
- f"reading {h5o.file.filename}:{h5o.name}[{start_row}:{n_rows}], decompress = {decompress}, "
56
+ f"reading {fname}:{oname}[{start_row}:{n_rows}], decompress = {decompress}, "
101
57
  + (f" with field mask {field_mask}" if field_mask else "")
102
58
  )
103
59
 
104
- # make idx a proper tuple if it's not one already
105
- if not (isinstance(idx, tuple) and len(idx) == 1) and idx is not None:
106
- idx = (idx,)
107
-
60
+ attrs = utils.read_attrs(h5o, fname, oname)
108
61
  try:
109
- lgdotype = dtypeutils.datatype(h5o.attrs["datatype"])
62
+ lgdotype = dtypeutils.datatype(attrs["datatype"])
110
63
  except KeyError as e:
111
64
  msg = "dataset not in file or missing 'datatype' attribute"
112
- raise LH5DecodeError(msg, h5o) from e
65
+ raise LH5DecodeError(msg, fname, oname) from e
113
66
 
114
67
  if lgdotype is Scalar:
115
68
  return _h5_read_scalar(
116
69
  h5o,
70
+ fname,
71
+ oname,
117
72
  obj_buf=obj_buf,
118
73
  )
119
74
 
@@ -125,7 +80,7 @@ def _h5_read_lgdo(
125
80
  if len(field_mask) > 0:
126
81
  default = not field_mask[next(iter(field_mask.keys()))]
127
82
  field_mask = defaultdict(lambda: default, field_mask)
128
- elif isinstance(field_mask, (list, tuple)):
83
+ elif isinstance(field_mask, (list, tuple, set)):
129
84
  field_mask = defaultdict(bool, {field: True for field in field_mask})
130
85
  elif not isinstance(field_mask, defaultdict):
131
86
  msg = "bad field_mask type"
@@ -134,6 +89,8 @@ def _h5_read_lgdo(
134
89
  if lgdotype is Struct:
135
90
  return _h5_read_struct(
136
91
  h5o,
92
+ fname,
93
+ oname,
137
94
  start_row=start_row,
138
95
  n_rows=n_rows,
139
96
  idx=idx,
@@ -145,20 +102,21 @@ def _h5_read_lgdo(
145
102
  # Below here is all array-like types. So trim idx if needed
146
103
  if idx is not None:
147
104
  # check if idx is just an ordered list of the integers if so can ignore
148
- if (idx[0] == np.arange(0, len(idx[0]), 1)).all():
149
- if n_rows > len(idx[0]):
150
- n_rows = len(idx[0])
105
+ if (idx == np.arange(0, len(idx), 1)).all():
106
+ n_rows = min(n_rows, len(idx))
151
107
  idx = None
152
108
  else:
153
109
  # chop off indices < start_row
154
- i_first_valid = bisect.bisect_left(idx[0], start_row)
155
- idxa = idx[0][i_first_valid:]
110
+ i_first_valid = bisect.bisect_left(idx, start_row)
111
+ idxa = idx[i_first_valid:]
156
112
  # don't readout more than n_rows indices
157
- idx = (idxa[:n_rows],) # works even if n_rows > len(idxa)
113
+ idx = idxa[:n_rows] # works even if n_rows > len(idxa)
158
114
 
159
115
  if lgdotype is Table:
160
116
  return _h5_read_table(
161
117
  h5o,
118
+ fname,
119
+ oname,
162
120
  start_row=start_row,
163
121
  n_rows=n_rows,
164
122
  idx=idx,
@@ -172,6 +130,8 @@ def _h5_read_lgdo(
172
130
  if lgdotype is Histogram:
173
131
  return _h5_read_histogram(
174
132
  h5o,
133
+ fname,
134
+ oname,
175
135
  start_row=start_row,
176
136
  n_rows=n_rows,
177
137
  idx=idx,
@@ -185,6 +145,8 @@ def _h5_read_lgdo(
185
145
  if lgdotype is ArrayOfEncodedEqualSizedArrays:
186
146
  return _h5_read_array_of_encoded_equalsized_arrays(
187
147
  h5o,
148
+ fname,
149
+ oname,
188
150
  start_row=start_row,
189
151
  n_rows=n_rows,
190
152
  idx=idx,
@@ -197,6 +159,8 @@ def _h5_read_lgdo(
197
159
  if lgdotype is VectorOfEncodedVectors:
198
160
  return _h5_read_vector_of_encoded_vectors(
199
161
  h5o,
162
+ fname,
163
+ oname,
200
164
  start_row=start_row,
201
165
  n_rows=n_rows,
202
166
  idx=idx,
@@ -209,6 +173,8 @@ def _h5_read_lgdo(
209
173
  if lgdotype is VectorOfVectors:
210
174
  return _h5_read_vector_of_vectors(
211
175
  h5o,
176
+ fname,
177
+ oname,
212
178
  start_row=start_row,
213
179
  n_rows=n_rows,
214
180
  idx=idx,
@@ -220,6 +186,8 @@ def _h5_read_lgdo(
220
186
  if lgdotype is FixedSizeArray:
221
187
  return _h5_read_fixedsize_array(
222
188
  h5o,
189
+ fname,
190
+ oname,
223
191
  start_row=start_row,
224
192
  n_rows=n_rows,
225
193
  idx=idx,
@@ -231,6 +199,8 @@ def _h5_read_lgdo(
231
199
  if lgdotype is ArrayOfEqualSizedArrays:
232
200
  return _h5_read_array_of_equalsized_arrays(
233
201
  h5o,
202
+ fname,
203
+ oname,
234
204
  start_row=start_row,
235
205
  n_rows=n_rows,
236
206
  idx=idx,
@@ -242,6 +212,8 @@ def _h5_read_lgdo(
242
212
  if lgdotype is Array:
243
213
  return _h5_read_array(
244
214
  h5o,
215
+ fname,
216
+ oname,
245
217
  start_row=start_row,
246
218
  n_rows=n_rows,
247
219
  idx=idx,
@@ -251,11 +223,13 @@ def _h5_read_lgdo(
251
223
  )
252
224
 
253
225
  msg = f"no rule to decode {lgdotype.__name__} from LH5"
254
- raise LH5DecodeError(msg, h5o)
226
+ raise LH5DecodeError(msg, fname, oname)
255
227
 
256
228
 
257
229
  def _h5_read_struct(
258
230
  h5g,
231
+ fname,
232
+ oname,
259
233
  start_row=0,
260
234
  n_rows=sys.maxsize,
261
235
  idx=None,
@@ -268,7 +242,7 @@ def _h5_read_struct(
268
242
  # table... Maybe should emit a warning? Or allow them to be
269
243
  # dicts keyed by field name?
270
244
 
271
- attrs = dict(h5g.attrs)
245
+ attrs = utils.read_attrs(h5g, fname, oname)
272
246
 
273
247
  # determine fields to be read out
274
248
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -286,20 +260,26 @@ def _h5_read_struct(
286
260
  for field in selected_fields:
287
261
  # support for integer keys
288
262
  field_key = int(field) if attrs.get("int_keys") else str(field)
263
+ h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
289
264
  obj_dict[field_key], _ = _h5_read_lgdo(
290
- h5g[field],
265
+ h5o,
266
+ fname,
267
+ f"{oname}/{field}",
291
268
  start_row=start_row,
292
269
  n_rows=n_rows,
293
270
  idx=idx,
294
271
  use_h5idx=use_h5idx,
295
272
  decompress=decompress,
296
273
  )
274
+ h5o.close()
297
275
 
298
276
  return Struct(obj_dict=obj_dict, attrs=attrs), 1
299
277
 
300
278
 
301
279
  def _h5_read_table(
302
280
  h5g,
281
+ fname,
282
+ oname,
303
283
  start_row=0,
304
284
  n_rows=sys.maxsize,
305
285
  idx=None,
@@ -311,9 +291,9 @@ def _h5_read_table(
311
291
  ):
312
292
  if obj_buf is not None and not isinstance(obj_buf, Table):
313
293
  msg = "provided object buffer is not a Table"
314
- raise LH5DecodeError(msg, h5g)
294
+ raise LH5DecodeError(msg, fname, oname)
315
295
 
316
- attrs = dict(h5g.attrs)
296
+ attrs = utils.read_attrs(h5g, fname, oname)
317
297
 
318
298
  # determine fields to be read out
319
299
  all_fields = dtypeutils.get_struct_fields(attrs["datatype"])
@@ -334,12 +314,15 @@ def _h5_read_table(
334
314
  if obj_buf is not None:
335
315
  if not isinstance(obj_buf, Table) or field not in obj_buf:
336
316
  msg = "provided object buffer is not a Table or columns are missing"
337
- raise LH5DecodeError(msg, h5g)
317
+ raise LH5DecodeError(msg, fname, oname)
338
318
 
339
319
  fld_buf = obj_buf[field]
340
320
 
321
+ h5o = h5py.h5o.open(h5g, field.encode("utf-8"))
341
322
  col_dict[field], n_rows_read = _h5_read_lgdo(
342
- h5g[field],
323
+ h5o,
324
+ fname,
325
+ f"{oname}/{field}",
343
326
  start_row=start_row,
344
327
  n_rows=n_rows,
345
328
  idx=idx,
@@ -348,6 +331,7 @@ def _h5_read_table(
348
331
  obj_buf_start=obj_buf_start,
349
332
  decompress=decompress,
350
333
  )
334
+ h5o.close()
351
335
 
352
336
  if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
353
337
  obj_buf.resize(obj_buf_start + n_rows_read)
@@ -359,12 +343,12 @@ def _h5_read_table(
359
343
  n_rows_read = rows_read[0]
360
344
  else:
361
345
  n_rows_read = 0
362
- log.warning(f"Table '{h5g.name}' has no fields specified by {field_mask=}")
346
+ log.warning(f"Table '{oname}' has no fields specified by {field_mask=}")
363
347
 
364
348
  for n in rows_read[1:]:
365
349
  if n != n_rows_read:
366
350
  log.warning(
367
- f"Table '{h5g.name}' got strange n_rows_read = {n}, "
351
+ f"Table '{oname}' got strange n_rows_read = {n}, "
368
352
  "{n_rows_read} was expected ({rows_read})"
369
353
  )
370
354
 
@@ -396,13 +380,15 @@ def _h5_read_table(
396
380
  obj_buf.loc = obj_buf_start + n_rows_read
397
381
 
398
382
  # check attributes
399
- utils.check_obj_buf_attrs(obj_buf.attrs, attrs, h5g)
383
+ utils.check_obj_buf_attrs(obj_buf.attrs, attrs, fname, oname)
400
384
 
401
385
  return obj_buf, n_rows_read
402
386
 
403
387
 
404
388
  def _h5_read_histogram(
405
389
  h5g,
390
+ fname,
391
+ oname,
406
392
  start_row=0,
407
393
  n_rows=sys.maxsize,
408
394
  idx=None,
@@ -414,17 +400,20 @@ def _h5_read_histogram(
414
400
  ):
415
401
  if obj_buf is not None or obj_buf_start != 0:
416
402
  msg = "reading a histogram into an existing object buffer is not supported"
417
- raise LH5DecodeError(msg, h5g)
403
+ raise LH5DecodeError(msg, fname, oname)
418
404
 
419
405
  struct, n_rows_read = _h5_read_struct(
420
406
  h5g,
421
- start_row,
422
- n_rows,
423
- idx,
424
- use_h5idx,
425
- field_mask,
426
- decompress,
407
+ fname,
408
+ oname,
409
+ start_row=start_row,
410
+ n_rows=n_rows,
411
+ idx=idx,
412
+ use_h5idx=use_h5idx,
413
+ field_mask=field_mask,
414
+ decompress=decompress,
427
415
  )
416
+
428
417
  binning = []
429
418
  for _, a in struct.binning.items():
430
419
  be = a.binedges
@@ -434,7 +423,7 @@ def _h5_read_histogram(
434
423
  b = (be, None, None, None, a.closedleft.value)
435
424
  else:
436
425
  msg = "unexpected binning of histogram"
437
- raise LH5DecodeError(msg, h5g)
426
+ raise LH5DecodeError(msg, fname, oname)
438
427
  ax = Histogram.Axis(*b)
439
428
  # copy attrs to "clone" the "whole" struct.
440
429
  ax.attrs = a.getattrs(datatype=True)
@@ -3,6 +3,8 @@ from __future__ import annotations
3
3
  import logging
4
4
  import sys
5
5
 
6
+ import h5py
7
+
6
8
  from .... import compression as compress
7
9
  from ....types import (
8
10
  ArrayOfEncodedEqualSizedArrays,
@@ -13,6 +15,7 @@ from .array import (
13
15
  _h5_read_array,
14
16
  )
15
17
  from .scalar import _h5_read_scalar
18
+ from .utils import read_attrs
16
19
  from .vector_of_vectors import _h5_read_vector_of_vectors
17
20
 
18
21
  log = logging.getLogger(__name__)
@@ -20,21 +23,29 @@ log = logging.getLogger(__name__)
20
23
 
21
24
  def _h5_read_array_of_encoded_equalsized_arrays(
22
25
  h5g,
26
+ fname,
27
+ oname,
23
28
  **kwargs,
24
29
  ):
25
- return _h5_read_encoded_array(ArrayOfEncodedEqualSizedArrays, h5g, **kwargs)
30
+ return _h5_read_encoded_array(
31
+ ArrayOfEncodedEqualSizedArrays, h5g, fname, oname, **kwargs
32
+ )
26
33
 
27
34
 
28
35
  def _h5_read_vector_of_encoded_vectors(
29
36
  h5g,
37
+ fname,
38
+ oname,
30
39
  **kwargs,
31
40
  ):
32
- return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, **kwargs)
41
+ return _h5_read_encoded_array(VectorOfEncodedVectors, h5g, fname, oname, **kwargs)
33
42
 
34
43
 
35
44
  def _h5_read_encoded_array(
36
45
  lgdotype,
37
46
  h5g,
47
+ fname,
48
+ oname,
38
49
  start_row=0,
39
50
  n_rows=sys.maxsize,
40
51
  idx=None,
@@ -45,11 +56,11 @@ def _h5_read_encoded_array(
45
56
  ):
46
57
  if lgdotype not in (ArrayOfEncodedEqualSizedArrays, VectorOfEncodedVectors):
47
58
  msg = f"unsupported read of encoded type {lgdotype.__name__}"
48
- raise LH5DecodeError(msg, h5g)
59
+ raise LH5DecodeError(msg, fname, oname)
49
60
 
50
61
  if not decompress and obj_buf is not None and not isinstance(obj_buf, lgdotype):
51
62
  msg = f"object buffer is not a {lgdotype.__name__}"
52
- raise LH5DecodeError(msg, h5g)
63
+ raise LH5DecodeError(msg, fname, oname)
53
64
 
54
65
  # read out decoded_size, either a Scalar or an Array
55
66
  decoded_size_buf = encoded_data_buf = None
@@ -58,8 +69,11 @@ def _h5_read_encoded_array(
58
69
  encoded_data_buf = obj_buf.encoded_data
59
70
 
60
71
  if lgdotype is VectorOfEncodedVectors:
72
+ h5o = h5py.h5o.open(h5g, b"decoded_size")
61
73
  decoded_size, _ = _h5_read_array(
62
- h5g["decoded_size"],
74
+ h5o,
75
+ fname,
76
+ f"{oname}/decoded_size",
63
77
  start_row=start_row,
64
78
  n_rows=n_rows,
65
79
  idx=idx,
@@ -67,16 +81,23 @@ def _h5_read_encoded_array(
67
81
  obj_buf=None if decompress else decoded_size_buf,
68
82
  obj_buf_start=0 if decompress else obj_buf_start,
69
83
  )
70
-
84
+ h5o.close()
71
85
  else:
86
+ h5o = h5py.h5o.open(h5g, b"decoded_size")
72
87
  decoded_size, _ = _h5_read_scalar(
73
- h5g["decoded_size"],
88
+ h5o,
89
+ fname,
90
+ f"{oname}/decoded_size",
74
91
  obj_buf=None if decompress else decoded_size_buf,
75
92
  )
93
+ h5o.close()
76
94
 
77
95
  # read out encoded_data, a VectorOfVectors
96
+ h5o = h5py.h5o.open(h5g, b"encoded_data")
78
97
  encoded_data, n_rows_read = _h5_read_vector_of_vectors(
79
- h5g["encoded_data"],
98
+ h5o,
99
+ fname,
100
+ f"{oname}/encoded_data",
80
101
  start_row=start_row,
81
102
  n_rows=n_rows,
82
103
  idx=idx,
@@ -84,6 +105,7 @@ def _h5_read_encoded_array(
84
105
  obj_buf=None if decompress else encoded_data_buf,
85
106
  obj_buf_start=0 if decompress else obj_buf_start,
86
107
  )
108
+ h5o.close()
87
109
 
88
110
  # return the still encoded data in the buffer object, if there
89
111
  if obj_buf is not None and not decompress:
@@ -93,7 +115,7 @@ def _h5_read_encoded_array(
93
115
  rawdata = lgdotype(
94
116
  encoded_data=encoded_data,
95
117
  decoded_size=decoded_size,
96
- attrs=dict(h5g.attrs),
118
+ attrs=read_attrs(h5g, fname, oname),
97
119
  )
98
120
 
99
121
  # already return if no decompression is requested