legend-pydataobj 1.12.0a1__tar.gz → 1.12.0a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/PKG-INFO +1 -1
  2. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/pyproject.toml +2 -2
  3. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/PKG-INFO +1 -1
  4. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/SOURCES.txt +0 -1
  5. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/__init__.py +5 -4
  6. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/_version.py +2 -2
  7. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/__init__.py +1 -3
  8. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/write/composite.py +2 -2
  9. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/core.py +5 -1
  10. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/store.py +7 -7
  11. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/tools.py +0 -111
  12. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/utils.py +5 -3
  13. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/array.py +17 -9
  14. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/vectorofvectors.py +64 -19
  15. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/vovutils.py +1 -1
  16. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/waveformtable.py +19 -21
  17. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/test_radware_sigcompress.py +2 -2
  18. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/conftest.py +3 -3
  19. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_lh5_utils.py +2 -1
  20. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_lh5_write.py +21 -21
  21. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_vectorofvectors.py +98 -34
  22. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_vovutils.py +52 -0
  23. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_waveformtable.py +13 -0
  24. legend_pydataobj-1.12.0a1/src/lgdo/lh5_store.py +0 -284
  25. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/LICENSE +0 -0
  26. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/README.md +0 -0
  27. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/setup.cfg +0 -0
  28. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/dependency_links.txt +0 -0
  29. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/entry_points.txt +0 -0
  30. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/not-zip-safe +0 -0
  31. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/requires.txt +0 -0
  32. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/legend_pydataobj.egg-info/top_level.txt +0 -0
  33. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/cli.py +0 -0
  34. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/compression/__init__.py +0 -0
  35. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/compression/base.py +0 -0
  36. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/compression/generic.py +0 -0
  37. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/compression/radware.py +0 -0
  38. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/compression/utils.py +0 -0
  39. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/compression/varlen.py +0 -0
  40. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lgdo_utils.py +0 -0
  41. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/__init__.py +0 -0
  42. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/__init__.py +0 -0
  43. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/array.py +0 -0
  44. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/composite.py +0 -0
  45. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/encoded.py +0 -0
  46. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/ndarray.py +0 -0
  47. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/scalar.py +0 -0
  48. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/utils.py +0 -0
  49. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/read/vector_of_vectors.py +0 -0
  50. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/write/__init__.py +0 -0
  51. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/write/array.py +0 -0
  52. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/write/scalar.py +0 -0
  53. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/_serializers/write/vector_of_vectors.py +0 -0
  54. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/concat.py +0 -0
  55. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/datatype.py +0 -0
  56. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/exceptions.py +0 -0
  57. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/lh5/iterator.py +0 -0
  58. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/logging.py +0 -0
  59. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/__init__.py +0 -0
  60. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/arrayofequalsizedarrays.py +0 -0
  61. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/encoded.py +0 -0
  62. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/fixedsizearray.py +0 -0
  63. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/histogram.py +0 -0
  64. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/lgdo.py +0 -0
  65. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/scalar.py +0 -0
  66. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/struct.py +0 -0
  67. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/types/table.py +0 -0
  68. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/units.py +0 -0
  69. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/src/lgdo/utils.py +0 -0
  70. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/conftest.py +0 -0
  71. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/sigcompress/LDQTA_r117_20200110T105115Z_cal_geds_raw-0.dat +0 -0
  72. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/sigcompress/special-wf-clipped.dat +0 -0
  73. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/test_compression.py +0 -0
  74. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/test_str2wfcodec.py +0 -0
  75. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/compression/test_uleb128_zigzag_diff.py +0 -0
  76. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/conftest.py +0 -0
  77. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_concat.py +0 -0
  78. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_core.py +0 -0
  79. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_exceptions.py +0 -0
  80. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_lh5_datatype.py +0 -0
  81. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_lh5_iterator.py +0 -0
  82. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_lh5_store.py +0 -0
  83. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/lh5/test_lh5_tools.py +0 -0
  84. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/test_cli.py +0 -0
  85. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/test_lgdo_utils.py +0 -0
  86. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_array.py +0 -0
  87. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_arrayofequalsizedarrays.py +0 -0
  88. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_encoded.py +0 -0
  89. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_fixedsizearray.py +0 -0
  90. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_histogram.py +0 -0
  91. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_representations.py +0 -0
  92. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_scalar.py +0 -0
  93. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_struct.py +0 -0
  94. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_table.py +0 -0
  95. {legend_pydataobj-1.12.0a1 → legend_pydataobj-1.12.0a3}/tests/types/test_table_eval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.12.0a1
3
+ Version: 1.12.0a3
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -122,7 +122,7 @@ extend-select = [
122
122
  "PIE", # flake8-pie
123
123
  "PL", # pylint
124
124
  "PT", # flake8-pytest-style
125
- #"PTH", # flake8-use-pathlib
125
+ "PTH", # flake8-use-pathlib
126
126
  "RET", # flake8-return
127
127
  "RUF", # Ruff-specific
128
128
  "SIM", # flake8-simplify
@@ -167,7 +167,7 @@ minversion = "6.0"
167
167
  addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
168
168
  xfail_strict = true
169
169
  filterwarnings = ["error", 'ignore:\nPyarrow:DeprecationWarning']
170
- log_cli_level = "info"
170
+ log_cli_level = "INFO"
171
171
  testpaths = "tests"
172
172
 
173
173
  [tool.codespell]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.12.0a1
3
+ Version: 1.12.0a3
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -12,7 +12,6 @@ src/lgdo/__init__.py
12
12
  src/lgdo/_version.py
13
13
  src/lgdo/cli.py
14
14
  src/lgdo/lgdo_utils.py
15
- src/lgdo/lh5_store.py
16
15
  src/lgdo/logging.py
17
16
  src/lgdo/units.py
18
17
  src/lgdo/utils.py
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
45
45
  from __future__ import annotations
46
46
 
47
47
  from ._version import version as __version__
48
- from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
48
+ from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
49
49
  from .types import (
50
50
  LGDO,
51
51
  Array,
@@ -69,7 +69,6 @@ __all__ = [
69
69
  "FixedSizeArray",
70
70
  "Histogram",
71
71
  "LH5Iterator",
72
- "LH5Store",
73
72
  "Scalar",
74
73
  "Struct",
75
74
  "Table",
@@ -77,8 +76,10 @@ __all__ = [
77
76
  "VectorOfVectors",
78
77
  "WaveformTable",
79
78
  "__version__",
80
- "load_dfs",
81
- "load_nda",
82
79
  "ls",
80
+ "read",
81
+ "read_as",
82
+ "read_n_rows",
83
83
  "show",
84
+ "write",
84
85
  ]
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.12.0a1'
21
- __version_tuple__ = version_tuple = (1, 12, 0)
20
+ __version__ = version = '1.12.0a3'
21
+ __version_tuple__ = version_tuple = (1, 12, 0, 'a3')
@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
11
11
  from .core import read, read_as, write
12
12
  from .iterator import LH5Iterator
13
13
  from .store import LH5Store
14
- from .tools import load_dfs, load_nda, ls, show
14
+ from .tools import ls, show
15
15
  from .utils import read_n_rows
16
16
 
17
17
  __all__ = [
@@ -19,8 +19,6 @@ __all__ = [
19
19
  "LH5Iterator",
20
20
  "LH5Store",
21
21
  "concat",
22
- "load_dfs",
23
- "load_nda",
24
22
  "ls",
25
23
  "read",
26
24
  "read_as",
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import os
5
4
  from inspect import signature
5
+ from pathlib import Path
6
6
 
7
7
  import h5py
8
8
 
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
55
  if not isinstance(lh5_file, h5py.File):
56
- mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
56
+ mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57
57
  lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
59
  log.debug(
@@ -113,7 +113,11 @@ def read(
113
113
  lh5_obj = lh5_file[name]
114
114
  elif isinstance(lh5_file, str):
115
115
  lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
116
- lh5_obj = lh5_file[name]
116
+ try:
117
+ lh5_obj = lh5_file[name]
118
+ except KeyError as ke:
119
+ err = f"Object {name} not found in file {lh5_file.filename}"
120
+ raise KeyError(err) from ke
117
121
  else:
118
122
  if obj_buf is not None:
119
123
  obj_buf.resize(obj_buf_start)
@@ -6,11 +6,11 @@ HDF5 files.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
- import os
10
9
  import sys
11
10
  from collections import OrderedDict
12
11
  from collections.abc import Mapping, Sequence
13
12
  from inspect import signature
13
+ from pathlib import Path
14
14
  from typing import Any
15
15
 
16
16
  import h5py
@@ -92,16 +92,16 @@ class LH5Store:
92
92
  return self.files[lh5_file]
93
93
 
94
94
  if self.base_path != "":
95
- full_path = os.path.join(self.base_path, lh5_file)
95
+ full_path = Path(self.base_path) / lh5_file
96
96
  else:
97
- full_path = lh5_file
97
+ full_path = Path(lh5_file)
98
98
 
99
- file_exists = os.path.exists(full_path)
99
+ file_exists = full_path.exists()
100
100
  if mode != "r":
101
- directory = os.path.dirname(full_path)
102
- if directory != "" and not os.path.exists(directory):
101
+ directory = full_path.parent
102
+ if directory != "" and not full_path.parent.exists():
103
103
  log.debug(f"making path {directory}")
104
- os.makedirs(directory)
104
+ directory.mkdir(parents=True, exist_ok=True)
105
105
 
106
106
  if mode == "r" and not file_exists:
107
107
  msg = f"file {full_path} not found"
@@ -1,16 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import fnmatch
4
- import glob
5
4
  import logging
6
- import os
7
5
  from copy import copy
8
- from warnings import warn
9
6
 
10
7
  import h5py
11
- import numpy as np
12
- import pandas as pd
13
- from numpy.typing import NDArray
14
8
 
15
9
  from . import utils
16
10
  from .store import LH5Store
@@ -223,108 +217,3 @@ def show(
223
217
  break
224
218
 
225
219
  key = k_new
226
-
227
-
228
- def load_nda(
229
- f_list: str | list[str],
230
- par_list: list[str],
231
- lh5_group: str = "",
232
- idx_list: list[NDArray | list | tuple] | None = None,
233
- ) -> dict[str, NDArray]:
234
- r"""Build a dictionary of :class:`numpy.ndarray`\ s from LH5 data.
235
-
236
- Given a list of files, a list of LH5 table parameters, and an optional
237
- group path, return a NumPy array with all values for each parameter.
238
-
239
- Parameters
240
- ----------
241
- f_list
242
- A list of files. Can contain wildcards.
243
- par_list
244
- A list of parameters to read from each file.
245
- lh5_group
246
- group path within which to find the specified parameters.
247
- idx_list
248
- for fancy-indexed reads. Must be one index array for each file in
249
- `f_list`.
250
-
251
- Returns
252
- -------
253
- par_data
254
- A dictionary of the parameter data keyed by the elements of `par_list`.
255
- Each entry contains the data for the specified parameter concatenated
256
- over all files in `f_list`.
257
- """
258
- warn(
259
- "load_nda() is deprecated. "
260
- "Please replace it with LH5Store.read(...).view_as('np'), "
261
- "or just read_as(..., 'np'). "
262
- "load_nda() will be removed in a future release.",
263
- DeprecationWarning,
264
- stacklevel=2,
265
- )
266
-
267
- if isinstance(f_list, str):
268
- f_list = [f_list]
269
- if idx_list is not None:
270
- idx_list = [idx_list]
271
- if idx_list is not None and len(f_list) != len(idx_list):
272
- msg = f"f_list length ({len(f_list)}) != idx_list length ({len(idx_list)})!"
273
- raise ValueError(msg)
274
-
275
- # Expand wildcards
276
- f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))]
277
-
278
- sto = LH5Store()
279
- par_data = {par: [] for par in par_list}
280
- for ii, ff in enumerate(f_list):
281
- f = sto.gimme_file(ff, "r")
282
- for par in par_list:
283
- if f"{lh5_group}/{par}" not in f:
284
- msg = f"'{lh5_group}/{par}' not in file {ff}"
285
- raise RuntimeError(msg)
286
-
287
- if idx_list is None:
288
- data, _ = sto.read(f"{lh5_group}/{par}", f)
289
- else:
290
- data, _ = sto.read(f"{lh5_group}/{par}", f, idx=idx_list[ii])
291
- if not data:
292
- continue
293
- par_data[par].append(data.nda)
294
- return {par: np.concatenate(par_data[par]) for par in par_list}
295
-
296
-
297
- def load_dfs(
298
- f_list: str | list[str],
299
- par_list: list[str],
300
- lh5_group: str = "",
301
- idx_list: list[NDArray | list | tuple] | None = None,
302
- ) -> pd.DataFrame:
303
- """Build a :class:`pandas.DataFrame` from LH5 data.
304
-
305
- Given a list of files (can use wildcards), a list of LH5 columns, and
306
- optionally the group path, return a :class:`pandas.DataFrame` with all
307
- values for each parameter.
308
-
309
- See Also
310
- --------
311
- :func:`load_nda`
312
-
313
- Returns
314
- -------
315
- dataframe
316
- contains columns for each parameter in `par_list`, and rows containing
317
- all data for the associated parameters concatenated over all files in
318
- `f_list`.
319
- """
320
- warn(
321
- "load_dfs() is deprecated. "
322
- "Please replace it with LH5Store.read(...).view_as('pd'), "
323
- "or just read_as(..., 'pd'). "
324
- "load_dfs() will be removed in a future release.",
325
- DeprecationWarning,
326
- stacklevel=2,
327
- )
328
- return pd.DataFrame(
329
- load_nda(f_list, par_list, lh5_group=lh5_group, idx_list=idx_list)
330
- )
@@ -7,6 +7,7 @@ import logging
7
7
  import os
8
8
  import string
9
9
  from collections.abc import Mapping, Sequence
10
+ from pathlib import Path
10
11
  from typing import Any
11
12
 
12
13
  import h5py
@@ -183,14 +184,15 @@ def expand_path(
183
184
  Unique absolute path, or list of all absolute paths
184
185
  """
185
186
  if base_path is not None and base_path != "":
186
- base_path = os.path.expanduser(os.path.expandvars(base_path))
187
- path = os.path.join(base_path, path)
187
+ base_path = Path(os.path.expandvars(base_path)).expanduser()
188
+ path = base_path / path
188
189
 
189
190
  # first expand variables
190
191
  _path = expand_vars(path, substitute)
191
192
 
192
193
  # then expand wildcards
193
- paths = sorted(glob.glob(os.path.expanduser(_path)))
194
+ # pathlib glob works differently so use glob for now
195
+ paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
194
196
 
195
197
  if base_path is not None and base_path != "":
196
198
  paths = [os.path.relpath(p, base_path) for p in paths]
@@ -6,7 +6,7 @@ corresponding utilities.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
- from collections.abc import Iterator
9
+ from collections.abc import Collection, Iterator
10
10
  from typing import Any
11
11
 
12
12
  import awkward as ak
@@ -126,19 +126,27 @@ class Array(LGDOCollection):
126
126
  "Set capacity to be minimum needed to support Array size"
127
127
  self.reserve_capacity(np.prod(self.shape))
128
128
 
129
- def resize(self, new_size: int, trim=False) -> None:
129
+ def resize(self, new_size: int | Collection[int], trim=False) -> None:
130
130
  """Set size of Array in rows. Only change capacity if it must be
131
131
  increased to accommodate new rows; in this case double capacity.
132
- If trim is True, capacity will be set to match size."""
132
+ If trim is True, capacity will be set to match size. If new_size
133
+ is an int, do not change size of inner dimensions.
133
134
 
134
- self._size = new_size
135
+ If new_size is a collection, internal memory will be re-allocated, so
136
+ this should be done only rarely!"""
135
137
 
136
- if trim and new_size != self.get_capacity:
137
- self.reserve_capacity(new_size)
138
+ if isinstance(new_size, Collection):
139
+ self._size = new_size[0]
140
+ self._nda.resize(new_size)
141
+ else:
142
+ self._size = new_size
143
+
144
+ if trim and new_size != self.get_capacity:
145
+ self.reserve_capacity(new_size)
138
146
 
139
- # If capacity is not big enough, set to next power of 2 big enough
140
- if new_size > self.get_capacity():
141
- self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
147
+ # If capacity is not big enough, set to next power of 2 big enough
148
+ if new_size > self.get_capacity():
149
+ self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
142
150
 
143
151
  def append(self, value: np.ndarray) -> None:
144
152
  "Append value to end of array (with copy)"
@@ -130,20 +130,48 @@ class VectorOfVectors(LGDOCollection):
130
130
 
131
131
  # ak.to_buffer helps in de-serialization
132
132
  # NOTE: ak.to_packed() needed?
133
- form, length, container = ak.to_buffers(ak.to_packed(data))
134
-
135
- # NOTE: node#-data is not even in the dict if the awkward array is empty
136
- # NOTE: if the data arg was a numpy array, to_buffers() preserves
137
- # the original dtype
138
- # FIXME: have to copy the buffers, otherwise self will not own the
139
- # data and self.resize() will fail. Is it possible to avoid this?
140
- flattened_data = np.copy(
141
- container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
142
- )
133
+ form, _, container = ak.to_buffers(ak.to_packed(data))
134
+
135
+ # check if bytestring
136
+ curr = form
137
+ for _ in range(data.ndim - 1):
138
+ curr = curr.content
139
+ if (
140
+ "__array__" in curr.parameters
141
+ and curr.parameters["__array__"] == "bytestring"
142
+ ):
143
+ diffs = np.diff(container[f"node{data.ndim - 1}-offsets"])
144
+ if (diffs != diffs[0]).all():
145
+ err_msg = "Non uniform string lengths not supported"
146
+ raise NotImplementedError(err_msg)
147
+ flattened_data = np.asarray(
148
+ ak.enforce_type(
149
+ ak.unflatten(
150
+ container.pop(
151
+ f"node{data.ndim}-data", np.empty(0, dtype=dtype)
152
+ ),
153
+ diffs[0],
154
+ ),
155
+ "bytes",
156
+ )
157
+ )
143
158
 
144
- # if user-provided dtype is different than dtype from Awkward, cast
145
- # NOTE: makes a copy only if needed
146
- flattened_data = np.asarray(flattened_data, dtype=dtype)
159
+ # if user-provided dtype is different than dtype from Awkward, cast
160
+ # NOTE: makes a copy only if needed
161
+ flattened_data = np.asarray(flattened_data, dtype=dtype)
162
+ else:
163
+ # NOTE: node#-data is not even in the dict if the awkward array is empty
164
+ # NOTE: if the data arg was a numpy array, to_buffers() preserves
165
+ # the original dtype
166
+ # FIXME: have to copy the buffers, otherwise self will not own the
167
+ # data and self.resize() will fail. Is it possible to avoid this?
168
+ flattened_data = np.copy(
169
+ container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
170
+ )
171
+
172
+ # if user-provided dtype is different than dtype from Awkward, cast
173
+ # NOTE: makes a copy only if needed
174
+ flattened_data = np.asarray(flattened_data, dtype=dtype)
147
175
 
148
176
  # start from innermost VoV and build nested structure
149
177
  for i in range(data.ndim - 2, -1, -1):
@@ -476,7 +504,10 @@ class VectorOfVectors(LGDOCollection):
476
504
  else:
477
505
  nan_val = np.nan
478
506
  vovutils._nb_fill(
479
- vec, lens, nan_val, self.flattened_data.nda[start : cum_lens[-1]]
507
+ vec,
508
+ lens,
509
+ np.array([nan_val]).astype(self.flattened_data.nda.dtype),
510
+ self.flattened_data.nda[start : cum_lens[-1]],
480
511
  )
481
512
 
482
513
  # add new vector(s) length to cumulative_length
@@ -627,11 +658,25 @@ class VectorOfVectors(LGDOCollection):
627
658
  offsets[1:] = self.cumulative_length.nda
628
659
  offsets[0] = 0
629
660
 
630
- content = (
631
- ak.contents.NumpyArray(self.flattened_data.nda)
632
- if self.ndim == 2
633
- else self.flattened_data.view_as(library, with_units=with_units).layout
634
- )
661
+ if self.ndim != 2:
662
+ content = self.flattened_data.view_as(
663
+ library, with_units=with_units
664
+ ).layout
665
+ # need to handle strings separately
666
+ elif np.issubdtype(self.flattened_data.nda.dtype, np.bytes_):
667
+ byte_arrays = []
668
+ for s in self.flattened_data.nda:
669
+ # Convert each string to array of bytes
670
+ byte_array = np.frombuffer(s, dtype=np.uint8)
671
+ byte_arrays.append(byte_array)
672
+ max_len = max(len(b) for b in byte_arrays)
673
+ raw_arrays = ak.contents.NumpyArray(np.concatenate(byte_arrays))
674
+ array_of_chars = ak.contents.RegularArray(
675
+ raw_arrays, max_len, parameters={"__array__": "bytes"}
676
+ )
677
+ content = ak.enforce_type(array_of_chars, "bytes", highlevel=False)
678
+ else:
679
+ content = ak.contents.NumpyArray(self.flattened_data.nda)
635
680
 
636
681
  layout = ak.contents.ListOffsetArray(
637
682
  offsets=ak.index.Index(offsets),
@@ -131,7 +131,7 @@ def _nb_fill(
131
131
  for i, ll in enumerate(len_in):
132
132
  stop = start + ll
133
133
  if ll > max_len:
134
- flattened_array_out[start : start + max_len] = aoa_in[i, :]
134
+ flattened_array_out[start : start + max_len] = aoa_in[i, :max_len]
135
135
  flattened_array_out[start + max_len : stop] = nan_val
136
136
  else:
137
137
  flattened_array_out[start:stop] = aoa_in[i, :ll]
@@ -112,12 +112,10 @@ class WaveformTable(Table):
112
112
  if not isinstance(t0, Array):
113
113
  shape = (size,)
114
114
  t0_dtype = t0.dtype if hasattr(t0, "dtype") else np.float32
115
- nda = (
116
- t0 if isinstance(t0, np.ndarray) else np.full(shape, t0, dtype=t0_dtype)
117
- )
118
- if nda.shape != shape:
119
- nda.resize(shape, refcheck=True)
120
- t0 = Array(nda=nda)
115
+ if isinstance(t0, np.ndarray):
116
+ t0 = Array(nda=t0, shape=shape, dtype=t0_dtype)
117
+ else:
118
+ t0 = Array(fill_val=t0, shape=shape, dtype=t0_dtype)
121
119
 
122
120
  if t0_units is not None:
123
121
  t0.attrs["units"] = f"{t0_units}"
@@ -125,12 +123,11 @@ class WaveformTable(Table):
125
123
  if not isinstance(dt, Array):
126
124
  shape = (size,)
127
125
  dt_dtype = dt.dtype if hasattr(dt, "dtype") else np.float32
128
- nda = (
129
- dt if isinstance(dt, np.ndarray) else np.full(shape, dt, dtype=dt_dtype)
130
- )
131
- if nda.shape != shape:
132
- nda.resize(shape, refcheck=True)
133
- dt = Array(nda=nda)
126
+ if isinstance(dt, np.ndarray):
127
+ dt = Array(nda=dt, shape=shape, dtype=dt_dtype)
128
+ else:
129
+ dt = Array(fill_val=dt, shape=shape, dtype=dt_dtype)
130
+
134
131
  if dt_units is not None:
135
132
  dt.attrs["units"] = f"{dt_units}"
136
133
 
@@ -174,14 +171,15 @@ class WaveformTable(Table):
174
171
  if hasattr(values, "dtype")
175
172
  else np.dtype(np.float64)
176
173
  )
177
- nda = (
178
- values
179
- if isinstance(values, np.ndarray)
180
- else np.zeros(shape, dtype=dtype)
181
- )
182
- if nda.shape != shape:
183
- nda.resize(shape, refcheck=True)
184
- values = ArrayOfEqualSizedArrays(dims=(1, 1), nda=nda)
174
+ if isinstance(values, np.ndarray):
175
+ values = ArrayOfEqualSizedArrays(
176
+ dims=(1, 1), nda=values, shape=shape, dtype=dtype
177
+ )
178
+ else:
179
+ values = ArrayOfEqualSizedArrays(
180
+ dims=(1, 1), fill_val=0, shape=shape, dtype=dtype
181
+ )
182
+
185
183
  if values_units is not None:
186
184
  values.attrs["units"] = f"{values_units}"
187
185
 
@@ -215,7 +213,7 @@ class WaveformTable(Table):
215
213
  return
216
214
  shape = self.values.nda.shape
217
215
  shape = (shape[0], wf_len)
218
- self.values.nda.resize(shape, refcheck=True)
216
+ self.values.resize(shape)
219
217
 
220
218
  def resize_wf_len(self, new_len: int) -> None:
221
219
  """Alias for `wf_len.setter`, for when we want to make it clear in
@@ -22,7 +22,7 @@ def read_sigcompress_c_output(filename: str):
22
22
  enc_wf_c = np.empty(0, dtype=np.uint16)
23
23
  nsig_c = None
24
24
  shift = None
25
- with open(filename) as f:
25
+ with Path(filename).open() as f:
26
26
  nsig_c = int(f.readline()) # first number in the file
27
27
  shift = int(f.readline()) # second number in the file
28
28
  for line in f.readlines(): # then the waveform
@@ -35,7 +35,7 @@ def read_sigcompress_c_output_multi(filename: str):
35
35
  enc_wf_c = []
36
36
  nsig_c = np.empty(0, dtype="uint32")
37
37
  shift = np.empty(0, dtype="int32")
38
- with open(filename) as f:
38
+ with Path(filename).open() as f:
39
39
  for line in f:
40
40
  parts = line.split()
41
41
  nsig_c = np.append(nsig_c, np.uint32(parts[0]))
@@ -1,20 +1,20 @@
1
1
  from __future__ import annotations
2
2
 
3
- import os
4
3
  import shutil
5
4
  import uuid
6
5
  from getpass import getuser
6
+ from pathlib import Path
7
7
  from tempfile import gettempdir
8
8
 
9
9
  import pytest
10
10
  from legendtestdata import LegendTestData
11
11
 
12
- _tmptestdir = os.path.join(gettempdir(), f"lgdo-tests-{getuser()}-{uuid.uuid4()!s}")
12
+ _tmptestdir = Path(gettempdir()) / f"lgdo-tests-{getuser()}-{uuid.uuid4()!s}"
13
13
 
14
14
 
15
15
  @pytest.fixture(scope="session")
16
16
  def tmptestdir():
17
- os.mkdir(_tmptestdir)
17
+ Path(_tmptestdir).mkdir(parents=True, exist_ok=True)
18
18
  return _tmptestdir
19
19
 
20
20
 
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import os
4
+ from pathlib import Path
4
5
 
5
6
  import pytest
6
7
 
@@ -36,7 +37,7 @@ def test_expand_path(lgnd_test_data):
36
37
  "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012228Z-tier_dsp.lh5"
37
38
  ),
38
39
  ]
39
- base_dir = os.path.dirname(files[0])
40
+ base_dir = Path(files[0]).parent
40
41
 
41
42
  assert utils.expand_path(f"{base_dir}/*20230318T012144Z*") == files[0]
42
43