legend-pydataobj 1.11.8__py3-none-any.whl → 1.11.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lgdo/lh5/store.py CHANGED
@@ -5,20 +5,21 @@ HDF5 files.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
+ import bisect
8
9
  import logging
10
+ import os
9
11
  import sys
10
12
  from collections import OrderedDict
11
13
  from collections.abc import Mapping, Sequence
12
14
  from inspect import signature
13
- from pathlib import Path
14
15
  from typing import Any
15
16
 
16
17
  import h5py
18
+ import numpy as np
17
19
  from numpy.typing import ArrayLike
18
20
 
19
21
  from .. import types
20
22
  from . import _serializers, utils
21
- from .core import read
22
23
 
23
24
  log = logging.getLogger(__name__)
24
25
 
@@ -92,16 +93,16 @@ class LH5Store:
92
93
  return self.files[lh5_file]
93
94
 
94
95
  if self.base_path != "":
95
- full_path = Path(self.base_path) / lh5_file
96
+ full_path = os.path.join(self.base_path, lh5_file)
96
97
  else:
97
- full_path = Path(lh5_file)
98
+ full_path = lh5_file
98
99
 
99
- file_exists = full_path.exists()
100
+ file_exists = os.path.exists(full_path)
100
101
  if mode != "r":
101
- directory = full_path.parent
102
- if directory != "" and not full_path.parent.exists():
102
+ directory = os.path.dirname(full_path)
103
+ if directory != "" and not os.path.exists(directory):
103
104
  log.debug(f"making path {directory}")
104
- directory.mkdir(parents=True, exist_ok=True)
105
+ os.makedirs(directory)
105
106
 
106
107
  if mode == "r" and not file_exists:
107
108
  msg = f"file {full_path} not found"
@@ -154,7 +155,7 @@ class LH5Store:
154
155
  """Returns an LH5 object appropriate for use as a pre-allocated buffer
155
156
  in a read loop. Sets size to `size` if object has a size.
156
157
  """
157
- obj = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
158
+ obj, n_rows = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
158
159
  if hasattr(obj, "resize") and size is not None:
159
160
  obj.resize(new_size=size)
160
161
  return obj
@@ -181,20 +182,72 @@ class LH5Store:
181
182
  """
182
183
  # grab files from store
183
184
  if isinstance(lh5_file, (str, h5py.File)):
184
- h5f = self.gimme_file(lh5_file, "r", **file_kwargs)
185
+ lh5_obj = self.gimme_file(lh5_file, "r", **file_kwargs)[name]
185
186
  else:
186
- h5f = [self.gimme_file(f, "r", **file_kwargs) for f in lh5_file]
187
- return read(
188
- name,
189
- h5f,
190
- start_row,
191
- n_rows,
192
- idx,
193
- use_h5idx,
194
- field_mask,
195
- obj_buf,
196
- obj_buf_start,
197
- decompress,
187
+ lh5_files = list(lh5_file)
188
+ n_rows_read = 0
189
+
190
+ for i, h5f in enumerate(lh5_files):
191
+ if (
192
+ isinstance(idx, (list, tuple))
193
+ and len(idx) > 0
194
+ and not np.isscalar(idx[0])
195
+ ):
196
+ # a list of lists: must be one per file
197
+ idx_i = idx[i]
198
+ elif idx is not None:
199
+ # make idx a proper tuple if it's not one already
200
+ if not (isinstance(idx, tuple) and len(idx) == 1):
201
+ idx = (idx,)
202
+ # idx is a long continuous array
203
+ n_rows_i = utils.read_n_rows(name, h5f)
204
+ # find the length of the subset of idx that contains indices
205
+ # that are less than n_rows_i
206
+ n_rows_to_read_i = bisect.bisect_left(idx[0], n_rows_i)
207
+ # now split idx into idx_i and the remainder
208
+ idx_i = np.array(idx[0])[:n_rows_to_read_i]
209
+ idx = np.array(idx[0])[n_rows_to_read_i:] - n_rows_i
210
+ else:
211
+ idx_i = None
212
+ n_rows_i = n_rows - n_rows_read
213
+
214
+ obj_buf, n_rows_read_i = self.read(
215
+ name,
216
+ h5f,
217
+ start_row,
218
+ n_rows_i,
219
+ idx_i,
220
+ use_h5idx,
221
+ field_mask,
222
+ obj_buf,
223
+ obj_buf_start,
224
+ decompress,
225
+ )
226
+
227
+ n_rows_read += n_rows_read_i
228
+ if n_rows_read >= n_rows or obj_buf is None:
229
+ return obj_buf, n_rows_read
230
+ start_row = 0
231
+ obj_buf_start += n_rows_read_i
232
+ return obj_buf, n_rows_read
233
+
234
+ if isinstance(idx, (list, tuple)) and len(idx) > 0 and not np.isscalar(idx[0]):
235
+ idx = idx[0]
236
+ if isinstance(idx, np.ndarray) and idx.dtype == np.dtype("?"):
237
+ idx = np.where(idx)[0]
238
+
239
+ return _serializers._h5_read_lgdo(
240
+ lh5_obj.id,
241
+ lh5_obj.file.filename,
242
+ lh5_obj.name,
243
+ start_row=start_row,
244
+ n_rows=n_rows,
245
+ idx=idx,
246
+ use_h5idx=use_h5idx,
247
+ field_mask=field_mask,
248
+ obj_buf=obj_buf,
249
+ obj_buf_start=obj_buf_start,
250
+ decompress=decompress,
198
251
  )
199
252
 
200
253
  def write(
lgdo/lh5/tools.py CHANGED
@@ -1,10 +1,16 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import fnmatch
4
+ import glob
4
5
  import logging
6
+ import os
5
7
  from copy import copy
8
+ from warnings import warn
6
9
 
7
10
  import h5py
11
+ import numpy as np
12
+ import pandas as pd
13
+ from numpy.typing import NDArray
8
14
 
9
15
  from . import utils
10
16
  from .store import LH5Store
@@ -217,3 +223,108 @@ def show(
217
223
  break
218
224
 
219
225
  key = k_new
226
+
227
+
228
+ def load_nda(
229
+ f_list: str | list[str],
230
+ par_list: list[str],
231
+ lh5_group: str = "",
232
+ idx_list: list[NDArray | list | tuple] | None = None,
233
+ ) -> dict[str, NDArray]:
234
+ r"""Build a dictionary of :class:`numpy.ndarray`\ s from LH5 data.
235
+
236
+ Given a list of files, a list of LH5 table parameters, and an optional
237
+ group path, return a NumPy array with all values for each parameter.
238
+
239
+ Parameters
240
+ ----------
241
+ f_list
242
+ A list of files. Can contain wildcards.
243
+ par_list
244
+ A list of parameters to read from each file.
245
+ lh5_group
246
+ group path within which to find the specified parameters.
247
+ idx_list
248
+ for fancy-indexed reads. Must be one index array for each file in
249
+ `f_list`.
250
+
251
+ Returns
252
+ -------
253
+ par_data
254
+ A dictionary of the parameter data keyed by the elements of `par_list`.
255
+ Each entry contains the data for the specified parameter concatenated
256
+ over all files in `f_list`.
257
+ """
258
+ warn(
259
+ "load_nda() is deprecated. "
260
+ "Please replace it with LH5Store.read(...).view_as('np'), "
261
+ "or just read_as(..., 'np'). "
262
+ "load_nda() will be removed in a future release.",
263
+ DeprecationWarning,
264
+ stacklevel=2,
265
+ )
266
+
267
+ if isinstance(f_list, str):
268
+ f_list = [f_list]
269
+ if idx_list is not None:
270
+ idx_list = [idx_list]
271
+ if idx_list is not None and len(f_list) != len(idx_list):
272
+ msg = f"f_list length ({len(f_list)}) != idx_list length ({len(idx_list)})!"
273
+ raise ValueError(msg)
274
+
275
+ # Expand wildcards
276
+ f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))]
277
+
278
+ sto = LH5Store()
279
+ par_data = {par: [] for par in par_list}
280
+ for ii, ff in enumerate(f_list):
281
+ f = sto.gimme_file(ff, "r")
282
+ for par in par_list:
283
+ if f"{lh5_group}/{par}" not in f:
284
+ msg = f"'{lh5_group}/{par}' not in file {ff}"
285
+ raise RuntimeError(msg)
286
+
287
+ if idx_list is None:
288
+ data, _ = sto.read(f"{lh5_group}/{par}", f)
289
+ else:
290
+ data, _ = sto.read(f"{lh5_group}/{par}", f, idx=idx_list[ii])
291
+ if not data:
292
+ continue
293
+ par_data[par].append(data.nda)
294
+ return {par: np.concatenate(par_data[par]) for par in par_list}
295
+
296
+
297
+ def load_dfs(
298
+ f_list: str | list[str],
299
+ par_list: list[str],
300
+ lh5_group: str = "",
301
+ idx_list: list[NDArray | list | tuple] | None = None,
302
+ ) -> pd.DataFrame:
303
+ """Build a :class:`pandas.DataFrame` from LH5 data.
304
+
305
+ Given a list of files (can use wildcards), a list of LH5 columns, and
306
+ optionally the group path, return a :class:`pandas.DataFrame` with all
307
+ values for each parameter.
308
+
309
+ See Also
310
+ --------
311
+ :func:`load_nda`
312
+
313
+ Returns
314
+ -------
315
+ dataframe
316
+ contains columns for each parameter in `par_list`, and rows containing
317
+ all data for the associated parameters concatenated over all files in
318
+ `f_list`.
319
+ """
320
+ warn(
321
+ "load_dfs() is deprecated. "
322
+ "Please replace it with LH5Store.read(...).view_as('pd'), "
323
+ "or just read_as(..., 'pd'). "
324
+ "load_dfs() will be removed in a future release.",
325
+ DeprecationWarning,
326
+ stacklevel=2,
327
+ )
328
+ return pd.DataFrame(
329
+ load_nda(f_list, par_list, lh5_group=lh5_group, idx_list=idx_list)
330
+ )
lgdo/lh5/utils.py CHANGED
@@ -7,7 +7,6 @@ import logging
7
7
  import os
8
8
  import string
9
9
  from collections.abc import Mapping, Sequence
10
- from pathlib import Path
11
10
  from typing import Any
12
11
 
13
12
  import h5py
@@ -154,7 +153,7 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
154
153
 
155
154
  # use provided mapping
156
155
  # then expand env variables
157
- return os.path.expandvars(string.Template(str(expr)).safe_substitute(substitute))
156
+ return os.path.expandvars(string.Template(expr).safe_substitute(substitute))
158
157
 
159
158
 
160
159
  def expand_path(
@@ -184,15 +183,14 @@ def expand_path(
184
183
  Unique absolute path, or list of all absolute paths
185
184
  """
186
185
  if base_path is not None and base_path != "":
187
- base_path = Path(os.path.expandvars(base_path)).expanduser()
188
- path = base_path / path
186
+ base_path = os.path.expanduser(os.path.expandvars(base_path))
187
+ path = os.path.join(base_path, path)
189
188
 
190
189
  # first expand variables
191
190
  _path = expand_vars(path, substitute)
192
191
 
193
192
  # then expand wildcards
194
- # pathlib glob works differently so use glob for now
195
- paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
193
+ paths = sorted(glob.glob(os.path.expanduser(_path)))
196
194
 
197
195
  if base_path is not None and base_path != "":
198
196
  paths = [os.path.relpath(p, base_path) for p in paths]
lgdo/lh5_store.py ADDED
@@ -0,0 +1,284 @@
1
+ """
2
+ .. warning::
3
+ This subpackage is deprecated, use :mod:`lgdo.lh5`.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import sys
9
+ from collections.abc import Iterator
10
+ from typing import Union
11
+ from warnings import warn
12
+
13
+ import h5py
14
+ import numpy as np
15
+ import pandas as pd
16
+
17
+ from . import lh5
18
+ from .types import (
19
+ Array,
20
+ ArrayOfEncodedEqualSizedArrays, # noqa: F401
21
+ ArrayOfEqualSizedArrays, # noqa: F401
22
+ FixedSizeArray, # noqa: F401
23
+ Histogram, # noqa: F401
24
+ Scalar,
25
+ Struct,
26
+ Table, # noqa: F401
27
+ VectorOfEncodedVectors, # noqa: F401
28
+ VectorOfVectors,
29
+ WaveformTable, # noqa: F401
30
+ )
31
+
32
+ LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
33
+
34
+
35
+ class LH5Iterator(lh5.LH5Iterator):
36
+ """
37
+ .. warning::
38
+ This class is deprecated, use :class:`lgdo.lh5.iterator.LH5Iterator`.
39
+
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ lh5_files: str | list[str],
45
+ groups: str | list[str],
46
+ base_path: str = "",
47
+ entry_list: list[int] | list[list[int]] | None = None,
48
+ entry_mask: list[bool] | list[list[bool]] | None = None,
49
+ field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
50
+ buffer_len: int = 3200,
51
+ friend: Iterator | None = None,
52
+ ) -> None:
53
+ warn(
54
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator."
55
+ "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
56
+ "lgdo.lh5_store will be removed in a future release.",
57
+ DeprecationWarning,
58
+ stacklevel=2,
59
+ )
60
+ super().__init__(
61
+ lh5_files,
62
+ groups,
63
+ base_path,
64
+ entry_list,
65
+ entry_mask,
66
+ field_mask,
67
+ buffer_len,
68
+ friend,
69
+ )
70
+
71
+ def write_object(
72
+ self,
73
+ obj: LGDO,
74
+ name: str,
75
+ lh5_file: str | h5py.File,
76
+ group: str | h5py.Group = "/",
77
+ start_row: int = 0,
78
+ n_rows: int | None = None,
79
+ wo_mode: str = "append",
80
+ write_start: int = 0,
81
+ **h5py_kwargs,
82
+ ) -> None:
83
+ """
84
+ .. warning::
85
+ This method is deprecated, use :meth:`lgdo.lh5.iterator.LH5Iterator.write`.
86
+
87
+ """
88
+ warn(
89
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. "
90
+ "The object you are calling this function from uses the old LH5Iterator class."
91
+ "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
92
+ "lgdo.lh5_store will be removed in a future release.",
93
+ DeprecationWarning,
94
+ stacklevel=2,
95
+ )
96
+ self.write(
97
+ obj,
98
+ name,
99
+ lh5_file,
100
+ group,
101
+ start_row,
102
+ n_rows,
103
+ wo_mode,
104
+ write_start,
105
+ h5py_kwargs,
106
+ )
107
+
108
+ def read_object(
109
+ self,
110
+ name: str,
111
+ lh5_file: str | h5py.File | list[str | h5py.File],
112
+ start_row: int = 0,
113
+ n_rows: int = sys.maxsize,
114
+ idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None,
115
+ field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
116
+ obj_buf: LGDO = None,
117
+ obj_buf_start: int = 0,
118
+ decompress: bool = True,
119
+ ) -> tuple[LGDO, int]:
120
+ """
121
+ .. warning::
122
+ This method is deprecated, use :meth:`lgdo.lh5.iterator.LH5Iterator.read`.
123
+
124
+ """
125
+ warn(
126
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. "
127
+ "The object you are calling this function from uses the old LH5Iterator class."
128
+ "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
129
+ "lgdo.lh5_store will be removed in a future release.",
130
+ DeprecationWarning,
131
+ stacklevel=2,
132
+ )
133
+ return self.read(
134
+ name,
135
+ lh5_file,
136
+ start_row,
137
+ n_rows,
138
+ idx,
139
+ field_mask,
140
+ obj_buf,
141
+ obj_buf_start,
142
+ decompress,
143
+ )
144
+
145
+
146
+ class LH5Store(lh5.LH5Store):
147
+ """
148
+ .. warning::
149
+ This class is deprecated, use :class:`lgdo.lh5.iterator.LH5Store`.
150
+
151
+ """
152
+
153
+ def __init__(self, base_path: str = "", keep_open: bool = False):
154
+ warn(
155
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store. "
156
+ "Please replace 'from lgdo.lh5_store import LH5Store' with 'from lgdo.lh5 import LH5Store'."
157
+ "lgdo.lh5_store will be removed in a future release.",
158
+ DeprecationWarning,
159
+ stacklevel=2,
160
+ )
161
+ super().__init__(base_path, keep_open)
162
+
163
+ def read_object(
164
+ self,
165
+ name: str,
166
+ lh5_file: str | h5py.File | list[str | h5py.File],
167
+ **kwargs,
168
+ ) -> tuple[LGDO, int]:
169
+ """
170
+ .. warning::
171
+ This method is deprecated, use :meth:`lgdo.lh5.store.LH5Store.read`.
172
+
173
+ """
174
+ warn(
175
+ "LH5Store.read_object() has been renamed to LH5Store.read(), "
176
+ "Please update your code."
177
+ "LH5Store.read_object() will be removed in a future release.",
178
+ DeprecationWarning,
179
+ stacklevel=2,
180
+ )
181
+ return super().read(self, name, lh5_file, **kwargs)
182
+
183
+ def write_object(
184
+ self,
185
+ obj: LGDO,
186
+ name: str,
187
+ lh5_file: str | h5py.File,
188
+ **kwargs,
189
+ ) -> tuple[LGDO, int]:
190
+ """
191
+ .. warning::
192
+ This method is deprecated, use :meth:`lgdo.lh5.store.LH5Store.write`.
193
+
194
+ """
195
+ warn(
196
+ "LH5Store.write_object() has been renamed to LH5Store.write(), "
197
+ "Please update your code."
198
+ "LH5Store.write_object() will be removed in a future release.",
199
+ DeprecationWarning,
200
+ stacklevel=2,
201
+ )
202
+ return super().read(self, obj, name, lh5_file, **kwargs)
203
+
204
+
205
+ def load_dfs(
206
+ f_list: str | list[str],
207
+ par_list: list[str],
208
+ lh5_group: str = "",
209
+ idx_list: list[np.ndarray | list | tuple] | None = None,
210
+ ) -> pd.DataFrame:
211
+ """
212
+ .. warning::
213
+ This function is deprecated, use :meth:`lgdo.types.lgdo.LGDO.view_as` to
214
+ view LGDO data as a Pandas data structure.
215
+
216
+ """
217
+ warn(
218
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
219
+ "Please replace 'from lgdo.lh5_store import load_dfs' with 'from lgdo.lh5 import load_dfs'. "
220
+ "lgdo.lh5_store will be removed in a future release.",
221
+ DeprecationWarning,
222
+ stacklevel=2,
223
+ )
224
+ return lh5.load_dfs(f_list, par_list, lh5_group, idx_list)
225
+
226
+
227
+ def load_nda(
228
+ f_list: str | list[str],
229
+ par_list: list[str],
230
+ lh5_group: str = "",
231
+ idx_list: list[np.ndarray | list | tuple] | None = None,
232
+ ) -> dict[str, np.ndarray]:
233
+ """
234
+ .. warning::
235
+ This function is deprecated, use :meth:`lgdo.types.lgdo.LGDO.view_as` to
236
+ view LGDO data as a NumPy data structure.
237
+
238
+ """
239
+ warn(
240
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
241
+ "Please replace 'from lgdo.lh5_store import load_nda' with 'from lgdo.lh5 import load_nda'. "
242
+ "lgdo.lh5_store will be removed in a future release.",
243
+ DeprecationWarning,
244
+ stacklevel=2,
245
+ )
246
+ return lh5.load_nda(f_list, par_list, lh5_group, idx_list)
247
+
248
+
249
+ def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]:
250
+ """
251
+ .. warning::
252
+ This function is deprecated, import :func:`lgdo.lh5.tools.ls`.
253
+
254
+ """
255
+ warn(
256
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
257
+ "Please replace 'from lgdo.lh5_store import ls' with 'from lgdo.lh5 import ls'. "
258
+ "lgdo.lh5_store will be removed in a future release.",
259
+ DeprecationWarning,
260
+ stacklevel=2,
261
+ )
262
+ return lh5.ls(lh5_file, lh5_group)
263
+
264
+
265
+ def show(
266
+ lh5_file: str | h5py.Group,
267
+ lh5_group: str = "/",
268
+ attrs: bool = False,
269
+ indent: str = "",
270
+ header: bool = True,
271
+ ) -> None:
272
+ """
273
+ .. warning::
274
+ This function is deprecated, import :func:`lgdo.lh5.tools.show`.
275
+
276
+ """
277
+ warn(
278
+ "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
279
+ "Please replace 'from lgdo.lh5_store import show' with 'from lgdo.lh5 import show'. "
280
+ "lgdo.lh5_store will be removed in a future release.",
281
+ DeprecationWarning,
282
+ stacklevel=2,
283
+ )
284
+ lh5.show(lh5_file, lh5_group, attrs, indent, header)