legend-pydataobj 1.12.0a1__py3-none-any.whl → 1.12.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.12.0a1
3
+ Version: 1.12.0a3
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,9 +1,8 @@
1
- legend_pydataobj-1.12.0a1.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
- lgdo/__init__.py,sha256=QMYK9HhoMi0pbahPN8mPD18gyTxscFgo7QKfCxVhy-0,3196
3
- lgdo/_version.py,sha256=kTYHwRhTzZEJHpwJeVgXBi4yFTeQDpnR6MYkvCMA06Q,515
1
+ legend_pydataobj-1.12.0a3.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
+ lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
3
+ lgdo/_version.py,sha256=0ujGt6htU-oZxWpmS96KCcv4YV2Uo7Akjbaoi2K7Od8,521
4
4
  lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
5
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
6
- lgdo/lh5_store.py,sha256=5BzbJA9sLcqjp8bJDc2olwOiw0VS6rmfg3cfh1kQkRY,8512
7
6
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
8
7
  lgdo/units.py,sha256=VQYME86_ev9S7Fq8RyCOQNqYr29MphTTYemmEouZafk,161
9
8
  lgdo/utils.py,sha256=WRTmXnaQ-h2hVxwJ27qiOigdsD3DHcaDrdDjvupCuZU,3940
@@ -13,15 +12,15 @@ lgdo/compression/generic.py,sha256=tF3UhLJbUDcovLxpIzgQRxFSjZ5Fz3uDRy9kI4mFntQ,2
13
12
  lgdo/compression/radware.py,sha256=GcNTtjuyL7VBBqziUBmSqNXuhqy1bJJgvcyvyumPtrc,23839
14
13
  lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,1140
15
14
  lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
16
- lgdo/lh5/__init__.py,sha256=y1XE_mpFWwamrl7WVjAVSVB25X4PrEfdVXSneSQEmlQ,825
15
+ lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
17
16
  lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
18
- lgdo/lh5/core.py,sha256=GjosZGUp4GSO5FtWV9eXUt_6DGU_OwJXODlj5K1j93M,13320
17
+ lgdo/lh5/core.py,sha256=nULH5UoRjUCH0E3Z0-OH_DbFz2PRAQP73Qaf1kfnyPE,13481
19
18
  lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
20
19
  lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
21
20
  lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
22
- lgdo/lh5/store.py,sha256=MYbMt-Mc7izELxuyLlSrrYrylCIzxc2CLzZYIVbZ33w,8455
23
- lgdo/lh5/tools.py,sha256=T9CgHA8A3_tVBMtiNJ6hATQKhdqI61m3cX4p2wGKc6c,9937
24
- lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
21
+ lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
22
+ lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
23
+ lgdo/lh5/utils.py,sha256=D5w-3fRLIuN971pMi3RttJZVVmaFJzt25Gfyyp6TUfc,6331
25
24
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
26
25
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
26
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
@@ -33,11 +32,11 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
33
32
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
34
33
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
34
  lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
36
- lgdo/lh5/_serializers/write/composite.py,sha256=I6lH0nWFIpAfZyG4-0rLxzg3mfazZ_FEhQVp1FZ0aA4,9254
35
+ lgdo/lh5/_serializers/write/composite.py,sha256=JYoLT9intT_Y4xPeL_l7CSd22O0ZKyEmd0flKkWWPFA,9268
37
36
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
38
37
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
39
38
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
40
- lgdo/types/array.py,sha256=e3p93yrfzSmyBgWdGqqtETcKpM7_FxENaAErru15rvo,8904
39
+ lgdo/types/array.py,sha256=vxViJScqKw4zGUrrIOuuU_9Y0oTfOkEEhs0TOyUYjwI,9284
41
40
  lgdo/types/arrayofequalsizedarrays.py,sha256=DOGJiTmc1QCdm7vLbE6uIRXoMPtt8uuCfmwQawgWf5s,4949
42
41
  lgdo/types/encoded.py,sha256=_e8u_BPfpjJbLnEdyTo9QG3kbNsGj0BN4gjdj3L1ndw,15640
43
42
  lgdo/types/fixedsizearray.py,sha256=7RjUwTz1bW0pcrdy27JlfrXPAuOU89Kj7pOuSUCojK8,1527
@@ -46,11 +45,11 @@ lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
46
45
  lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
47
46
  lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
48
47
  lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
49
- lgdo/types/vectorofvectors.py,sha256=CtPR2WDBmJmzzfXwH4aUcNMB5LvTiGWmL_qRbFah3to,24756
50
- lgdo/types/vovutils.py,sha256=WjvPLEJrRNjktnbyfypfgxZX-K_aOvcwPygfzoknsyA,10701
51
- lgdo/types/waveformtable.py,sha256=f2tS4f1OEoYaTM5ldCX9zmw8iSISCT3t3wS1SrPdu_o,9901
52
- legend_pydataobj-1.12.0a1.dist-info/METADATA,sha256=55pMph32j8h4LKGnoVEdvHX27bHr8k__sdT4L9O5dIA,44445
53
- legend_pydataobj-1.12.0a1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
54
- legend_pydataobj-1.12.0a1.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
55
- legend_pydataobj-1.12.0a1.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
56
- legend_pydataobj-1.12.0a1.dist-info/RECORD,,
48
+ lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
49
+ lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
50
+ lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
51
+ legend_pydataobj-1.12.0a3.dist-info/METADATA,sha256=6KWUi7oveauluZsDiYwncWRg5ix2LHmzOeomT_Or1TI,44445
52
+ legend_pydataobj-1.12.0a3.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
53
+ legend_pydataobj-1.12.0a3.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
54
+ legend_pydataobj-1.12.0a3.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
55
+ legend_pydataobj-1.12.0a3.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
lgdo/__init__.py CHANGED
@@ -45,7 +45,7 @@ browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
45
45
  from __future__ import annotations
46
46
 
47
47
  from ._version import version as __version__
48
- from .lh5_store import LH5Iterator, LH5Store, load_dfs, load_nda, ls, show
48
+ from .lh5 import LH5Iterator, ls, read, read_as, read_n_rows, show, write
49
49
  from .types import (
50
50
  LGDO,
51
51
  Array,
@@ -69,7 +69,6 @@ __all__ = [
69
69
  "FixedSizeArray",
70
70
  "Histogram",
71
71
  "LH5Iterator",
72
- "LH5Store",
73
72
  "Scalar",
74
73
  "Struct",
75
74
  "Table",
@@ -77,8 +76,10 @@ __all__ = [
77
76
  "VectorOfVectors",
78
77
  "WaveformTable",
79
78
  "__version__",
80
- "load_dfs",
81
- "load_nda",
82
79
  "ls",
80
+ "read",
81
+ "read_as",
82
+ "read_n_rows",
83
83
  "show",
84
+ "write",
84
85
  ]
lgdo/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.12.0a1'
21
- __version_tuple__ = version_tuple = (1, 12, 0)
20
+ __version__ = version = '1.12.0a3'
21
+ __version_tuple__ = version_tuple = (1, 12, 0, 'a3')
lgdo/lh5/__init__.py CHANGED
@@ -11,7 +11,7 @@ from ._serializers.write.array import DEFAULT_HDF5_SETTINGS
11
11
  from .core import read, read_as, write
12
12
  from .iterator import LH5Iterator
13
13
  from .store import LH5Store
14
- from .tools import load_dfs, load_nda, ls, show
14
+ from .tools import ls, show
15
15
  from .utils import read_n_rows
16
16
 
17
17
  __all__ = [
@@ -19,8 +19,6 @@ __all__ = [
19
19
  "LH5Iterator",
20
20
  "LH5Store",
21
21
  "concat",
22
- "load_dfs",
23
- "load_nda",
24
22
  "ls",
25
23
  "read",
26
24
  "read_as",
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import os
5
4
  from inspect import signature
5
+ from pathlib import Path
6
6
 
7
7
  import h5py
8
8
 
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
55
  if not isinstance(lh5_file, h5py.File):
56
- mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
56
+ mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57
57
  lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
59
  log.debug(
lgdo/lh5/core.py CHANGED
@@ -113,7 +113,11 @@ def read(
113
113
  lh5_obj = lh5_file[name]
114
114
  elif isinstance(lh5_file, str):
115
115
  lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
116
- lh5_obj = lh5_file[name]
116
+ try:
117
+ lh5_obj = lh5_file[name]
118
+ except KeyError as ke:
119
+ err = f"Object {name} not found in file {lh5_file.filename}"
120
+ raise KeyError(err) from ke
117
121
  else:
118
122
  if obj_buf is not None:
119
123
  obj_buf.resize(obj_buf_start)
lgdo/lh5/store.py CHANGED
@@ -6,11 +6,11 @@ HDF5 files.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
- import os
10
9
  import sys
11
10
  from collections import OrderedDict
12
11
  from collections.abc import Mapping, Sequence
13
12
  from inspect import signature
13
+ from pathlib import Path
14
14
  from typing import Any
15
15
 
16
16
  import h5py
@@ -92,16 +92,16 @@ class LH5Store:
92
92
  return self.files[lh5_file]
93
93
 
94
94
  if self.base_path != "":
95
- full_path = os.path.join(self.base_path, lh5_file)
95
+ full_path = Path(self.base_path) / lh5_file
96
96
  else:
97
- full_path = lh5_file
97
+ full_path = Path(lh5_file)
98
98
 
99
- file_exists = os.path.exists(full_path)
99
+ file_exists = full_path.exists()
100
100
  if mode != "r":
101
- directory = os.path.dirname(full_path)
102
- if directory != "" and not os.path.exists(directory):
101
+ directory = full_path.parent
102
+ if directory != "" and not full_path.parent.exists():
103
103
  log.debug(f"making path {directory}")
104
- os.makedirs(directory)
104
+ directory.mkdir(parents=True, exist_ok=True)
105
105
 
106
106
  if mode == "r" and not file_exists:
107
107
  msg = f"file {full_path} not found"
lgdo/lh5/tools.py CHANGED
@@ -1,16 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import fnmatch
4
- import glob
5
4
  import logging
6
- import os
7
5
  from copy import copy
8
- from warnings import warn
9
6
 
10
7
  import h5py
11
- import numpy as np
12
- import pandas as pd
13
- from numpy.typing import NDArray
14
8
 
15
9
  from . import utils
16
10
  from .store import LH5Store
@@ -223,108 +217,3 @@ def show(
223
217
  break
224
218
 
225
219
  key = k_new
226
-
227
-
228
- def load_nda(
229
- f_list: str | list[str],
230
- par_list: list[str],
231
- lh5_group: str = "",
232
- idx_list: list[NDArray | list | tuple] | None = None,
233
- ) -> dict[str, NDArray]:
234
- r"""Build a dictionary of :class:`numpy.ndarray`\ s from LH5 data.
235
-
236
- Given a list of files, a list of LH5 table parameters, and an optional
237
- group path, return a NumPy array with all values for each parameter.
238
-
239
- Parameters
240
- ----------
241
- f_list
242
- A list of files. Can contain wildcards.
243
- par_list
244
- A list of parameters to read from each file.
245
- lh5_group
246
- group path within which to find the specified parameters.
247
- idx_list
248
- for fancy-indexed reads. Must be one index array for each file in
249
- `f_list`.
250
-
251
- Returns
252
- -------
253
- par_data
254
- A dictionary of the parameter data keyed by the elements of `par_list`.
255
- Each entry contains the data for the specified parameter concatenated
256
- over all files in `f_list`.
257
- """
258
- warn(
259
- "load_nda() is deprecated. "
260
- "Please replace it with LH5Store.read(...).view_as('np'), "
261
- "or just read_as(..., 'np'). "
262
- "load_nda() will be removed in a future release.",
263
- DeprecationWarning,
264
- stacklevel=2,
265
- )
266
-
267
- if isinstance(f_list, str):
268
- f_list = [f_list]
269
- if idx_list is not None:
270
- idx_list = [idx_list]
271
- if idx_list is not None and len(f_list) != len(idx_list):
272
- msg = f"f_list length ({len(f_list)}) != idx_list length ({len(idx_list)})!"
273
- raise ValueError(msg)
274
-
275
- # Expand wildcards
276
- f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))]
277
-
278
- sto = LH5Store()
279
- par_data = {par: [] for par in par_list}
280
- for ii, ff in enumerate(f_list):
281
- f = sto.gimme_file(ff, "r")
282
- for par in par_list:
283
- if f"{lh5_group}/{par}" not in f:
284
- msg = f"'{lh5_group}/{par}' not in file {ff}"
285
- raise RuntimeError(msg)
286
-
287
- if idx_list is None:
288
- data, _ = sto.read(f"{lh5_group}/{par}", f)
289
- else:
290
- data, _ = sto.read(f"{lh5_group}/{par}", f, idx=idx_list[ii])
291
- if not data:
292
- continue
293
- par_data[par].append(data.nda)
294
- return {par: np.concatenate(par_data[par]) for par in par_list}
295
-
296
-
297
- def load_dfs(
298
- f_list: str | list[str],
299
- par_list: list[str],
300
- lh5_group: str = "",
301
- idx_list: list[NDArray | list | tuple] | None = None,
302
- ) -> pd.DataFrame:
303
- """Build a :class:`pandas.DataFrame` from LH5 data.
304
-
305
- Given a list of files (can use wildcards), a list of LH5 columns, and
306
- optionally the group path, return a :class:`pandas.DataFrame` with all
307
- values for each parameter.
308
-
309
- See Also
310
- --------
311
- :func:`load_nda`
312
-
313
- Returns
314
- -------
315
- dataframe
316
- contains columns for each parameter in `par_list`, and rows containing
317
- all data for the associated parameters concatenated over all files in
318
- `f_list`.
319
- """
320
- warn(
321
- "load_dfs() is deprecated. "
322
- "Please replace it with LH5Store.read(...).view_as('pd'), "
323
- "or just read_as(..., 'pd'). "
324
- "load_dfs() will be removed in a future release.",
325
- DeprecationWarning,
326
- stacklevel=2,
327
- )
328
- return pd.DataFrame(
329
- load_nda(f_list, par_list, lh5_group=lh5_group, idx_list=idx_list)
330
- )
lgdo/lh5/utils.py CHANGED
@@ -7,6 +7,7 @@ import logging
7
7
  import os
8
8
  import string
9
9
  from collections.abc import Mapping, Sequence
10
+ from pathlib import Path
10
11
  from typing import Any
11
12
 
12
13
  import h5py
@@ -183,14 +184,15 @@ def expand_path(
183
184
  Unique absolute path, or list of all absolute paths
184
185
  """
185
186
  if base_path is not None and base_path != "":
186
- base_path = os.path.expanduser(os.path.expandvars(base_path))
187
- path = os.path.join(base_path, path)
187
+ base_path = Path(os.path.expandvars(base_path)).expanduser()
188
+ path = base_path / path
188
189
 
189
190
  # first expand variables
190
191
  _path = expand_vars(path, substitute)
191
192
 
192
193
  # then expand wildcards
193
- paths = sorted(glob.glob(os.path.expanduser(_path)))
194
+ # pathlib glob works differently so use glob for now
195
+ paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
194
196
 
195
197
  if base_path is not None and base_path != "":
196
198
  paths = [os.path.relpath(p, base_path) for p in paths]
lgdo/types/array.py CHANGED
@@ -6,7 +6,7 @@ corresponding utilities.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
- from collections.abc import Iterator
9
+ from collections.abc import Collection, Iterator
10
10
  from typing import Any
11
11
 
12
12
  import awkward as ak
@@ -126,19 +126,27 @@ class Array(LGDOCollection):
126
126
  "Set capacity to be minimum needed to support Array size"
127
127
  self.reserve_capacity(np.prod(self.shape))
128
128
 
129
- def resize(self, new_size: int, trim=False) -> None:
129
+ def resize(self, new_size: int | Collection[int], trim=False) -> None:
130
130
  """Set size of Array in rows. Only change capacity if it must be
131
131
  increased to accommodate new rows; in this case double capacity.
132
- If trim is True, capacity will be set to match size."""
132
+ If trim is True, capacity will be set to match size. If new_size
133
+ is an int, do not change size of inner dimensions.
133
134
 
134
- self._size = new_size
135
+ If new_size is a collection, internal memory will be re-allocated, so
136
+ this should be done only rarely!"""
135
137
 
136
- if trim and new_size != self.get_capacity:
137
- self.reserve_capacity(new_size)
138
+ if isinstance(new_size, Collection):
139
+ self._size = new_size[0]
140
+ self._nda.resize(new_size)
141
+ else:
142
+ self._size = new_size
143
+
144
+ if trim and new_size != self.get_capacity:
145
+ self.reserve_capacity(new_size)
138
146
 
139
- # If capacity is not big enough, set to next power of 2 big enough
140
- if new_size > self.get_capacity():
141
- self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
147
+ # If capacity is not big enough, set to next power of 2 big enough
148
+ if new_size > self.get_capacity():
149
+ self.reserve_capacity(int(2 ** (np.ceil(np.log2(new_size)))))
142
150
 
143
151
  def append(self, value: np.ndarray) -> None:
144
152
  "Append value to end of array (with copy)"
@@ -130,20 +130,48 @@ class VectorOfVectors(LGDOCollection):
130
130
 
131
131
  # ak.to_buffer helps in de-serialization
132
132
  # NOTE: ak.to_packed() needed?
133
- form, length, container = ak.to_buffers(ak.to_packed(data))
134
-
135
- # NOTE: node#-data is not even in the dict if the awkward array is empty
136
- # NOTE: if the data arg was a numpy array, to_buffers() preserves
137
- # the original dtype
138
- # FIXME: have to copy the buffers, otherwise self will not own the
139
- # data and self.resize() will fail. Is it possible to avoid this?
140
- flattened_data = np.copy(
141
- container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
142
- )
133
+ form, _, container = ak.to_buffers(ak.to_packed(data))
134
+
135
+ # check if bytestring
136
+ curr = form
137
+ for _ in range(data.ndim - 1):
138
+ curr = curr.content
139
+ if (
140
+ "__array__" in curr.parameters
141
+ and curr.parameters["__array__"] == "bytestring"
142
+ ):
143
+ diffs = np.diff(container[f"node{data.ndim - 1}-offsets"])
144
+ if (diffs != diffs[0]).all():
145
+ err_msg = "Non uniform string lengths not supported"
146
+ raise NotImplementedError(err_msg)
147
+ flattened_data = np.asarray(
148
+ ak.enforce_type(
149
+ ak.unflatten(
150
+ container.pop(
151
+ f"node{data.ndim}-data", np.empty(0, dtype=dtype)
152
+ ),
153
+ diffs[0],
154
+ ),
155
+ "bytes",
156
+ )
157
+ )
143
158
 
144
- # if user-provided dtype is different than dtype from Awkward, cast
145
- # NOTE: makes a copy only if needed
146
- flattened_data = np.asarray(flattened_data, dtype=dtype)
159
+ # if user-provided dtype is different than dtype from Awkward, cast
160
+ # NOTE: makes a copy only if needed
161
+ flattened_data = np.asarray(flattened_data, dtype=dtype)
162
+ else:
163
+ # NOTE: node#-data is not even in the dict if the awkward array is empty
164
+ # NOTE: if the data arg was a numpy array, to_buffers() preserves
165
+ # the original dtype
166
+ # FIXME: have to copy the buffers, otherwise self will not own the
167
+ # data and self.resize() will fail. Is it possible to avoid this?
168
+ flattened_data = np.copy(
169
+ container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
170
+ )
171
+
172
+ # if user-provided dtype is different than dtype from Awkward, cast
173
+ # NOTE: makes a copy only if needed
174
+ flattened_data = np.asarray(flattened_data, dtype=dtype)
147
175
 
148
176
  # start from innermost VoV and build nested structure
149
177
  for i in range(data.ndim - 2, -1, -1):
@@ -476,7 +504,10 @@ class VectorOfVectors(LGDOCollection):
476
504
  else:
477
505
  nan_val = np.nan
478
506
  vovutils._nb_fill(
479
- vec, lens, nan_val, self.flattened_data.nda[start : cum_lens[-1]]
507
+ vec,
508
+ lens,
509
+ np.array([nan_val]).astype(self.flattened_data.nda.dtype),
510
+ self.flattened_data.nda[start : cum_lens[-1]],
480
511
  )
481
512
 
482
513
  # add new vector(s) length to cumulative_length
@@ -627,11 +658,25 @@ class VectorOfVectors(LGDOCollection):
627
658
  offsets[1:] = self.cumulative_length.nda
628
659
  offsets[0] = 0
629
660
 
630
- content = (
631
- ak.contents.NumpyArray(self.flattened_data.nda)
632
- if self.ndim == 2
633
- else self.flattened_data.view_as(library, with_units=with_units).layout
634
- )
661
+ if self.ndim != 2:
662
+ content = self.flattened_data.view_as(
663
+ library, with_units=with_units
664
+ ).layout
665
+ # need to handle strings separately
666
+ elif np.issubdtype(self.flattened_data.nda.dtype, np.bytes_):
667
+ byte_arrays = []
668
+ for s in self.flattened_data.nda:
669
+ # Convert each string to array of bytes
670
+ byte_array = np.frombuffer(s, dtype=np.uint8)
671
+ byte_arrays.append(byte_array)
672
+ max_len = max(len(b) for b in byte_arrays)
673
+ raw_arrays = ak.contents.NumpyArray(np.concatenate(byte_arrays))
674
+ array_of_chars = ak.contents.RegularArray(
675
+ raw_arrays, max_len, parameters={"__array__": "bytes"}
676
+ )
677
+ content = ak.enforce_type(array_of_chars, "bytes", highlevel=False)
678
+ else:
679
+ content = ak.contents.NumpyArray(self.flattened_data.nda)
635
680
 
636
681
  layout = ak.contents.ListOffsetArray(
637
682
  offsets=ak.index.Index(offsets),
lgdo/types/vovutils.py CHANGED
@@ -131,7 +131,7 @@ def _nb_fill(
131
131
  for i, ll in enumerate(len_in):
132
132
  stop = start + ll
133
133
  if ll > max_len:
134
- flattened_array_out[start : start + max_len] = aoa_in[i, :]
134
+ flattened_array_out[start : start + max_len] = aoa_in[i, :max_len]
135
135
  flattened_array_out[start + max_len : stop] = nan_val
136
136
  else:
137
137
  flattened_array_out[start:stop] = aoa_in[i, :ll]
@@ -112,12 +112,10 @@ class WaveformTable(Table):
112
112
  if not isinstance(t0, Array):
113
113
  shape = (size,)
114
114
  t0_dtype = t0.dtype if hasattr(t0, "dtype") else np.float32
115
- nda = (
116
- t0 if isinstance(t0, np.ndarray) else np.full(shape, t0, dtype=t0_dtype)
117
- )
118
- if nda.shape != shape:
119
- nda.resize(shape, refcheck=True)
120
- t0 = Array(nda=nda)
115
+ if isinstance(t0, np.ndarray):
116
+ t0 = Array(nda=t0, shape=shape, dtype=t0_dtype)
117
+ else:
118
+ t0 = Array(fill_val=t0, shape=shape, dtype=t0_dtype)
121
119
 
122
120
  if t0_units is not None:
123
121
  t0.attrs["units"] = f"{t0_units}"
@@ -125,12 +123,11 @@ class WaveformTable(Table):
125
123
  if not isinstance(dt, Array):
126
124
  shape = (size,)
127
125
  dt_dtype = dt.dtype if hasattr(dt, "dtype") else np.float32
128
- nda = (
129
- dt if isinstance(dt, np.ndarray) else np.full(shape, dt, dtype=dt_dtype)
130
- )
131
- if nda.shape != shape:
132
- nda.resize(shape, refcheck=True)
133
- dt = Array(nda=nda)
126
+ if isinstance(dt, np.ndarray):
127
+ dt = Array(nda=dt, shape=shape, dtype=dt_dtype)
128
+ else:
129
+ dt = Array(fill_val=dt, shape=shape, dtype=dt_dtype)
130
+
134
131
  if dt_units is not None:
135
132
  dt.attrs["units"] = f"{dt_units}"
136
133
 
@@ -174,14 +171,15 @@ class WaveformTable(Table):
174
171
  if hasattr(values, "dtype")
175
172
  else np.dtype(np.float64)
176
173
  )
177
- nda = (
178
- values
179
- if isinstance(values, np.ndarray)
180
- else np.zeros(shape, dtype=dtype)
181
- )
182
- if nda.shape != shape:
183
- nda.resize(shape, refcheck=True)
184
- values = ArrayOfEqualSizedArrays(dims=(1, 1), nda=nda)
174
+ if isinstance(values, np.ndarray):
175
+ values = ArrayOfEqualSizedArrays(
176
+ dims=(1, 1), nda=values, shape=shape, dtype=dtype
177
+ )
178
+ else:
179
+ values = ArrayOfEqualSizedArrays(
180
+ dims=(1, 1), fill_val=0, shape=shape, dtype=dtype
181
+ )
182
+
185
183
  if values_units is not None:
186
184
  values.attrs["units"] = f"{values_units}"
187
185
 
@@ -215,7 +213,7 @@ class WaveformTable(Table):
215
213
  return
216
214
  shape = self.values.nda.shape
217
215
  shape = (shape[0], wf_len)
218
- self.values.nda.resize(shape, refcheck=True)
216
+ self.values.resize(shape)
219
217
 
220
218
  def resize_wf_len(self, new_len: int) -> None:
221
219
  """Alias for `wf_len.setter`, for when we want to make it clear in
lgdo/lh5_store.py DELETED
@@ -1,284 +0,0 @@
1
- """
2
- .. warning::
3
- This subpackage is deprecated, use :mod:`lgdo.lh5`.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- import sys
9
- from collections.abc import Iterator
10
- from typing import Union
11
- from warnings import warn
12
-
13
- import h5py
14
- import numpy as np
15
- import pandas as pd
16
-
17
- from . import lh5
18
- from .types import (
19
- Array,
20
- ArrayOfEncodedEqualSizedArrays, # noqa: F401
21
- ArrayOfEqualSizedArrays, # noqa: F401
22
- FixedSizeArray, # noqa: F401
23
- Histogram, # noqa: F401
24
- Scalar,
25
- Struct,
26
- Table, # noqa: F401
27
- VectorOfEncodedVectors, # noqa: F401
28
- VectorOfVectors,
29
- WaveformTable, # noqa: F401
30
- )
31
-
32
- LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
33
-
34
-
35
- class LH5Iterator(lh5.LH5Iterator):
36
- """
37
- .. warning::
38
- This class is deprecated, use :class:`lgdo.lh5.iterator.LH5Iterator`.
39
-
40
- """
41
-
42
- def __init__(
43
- self,
44
- lh5_files: str | list[str],
45
- groups: str | list[str],
46
- base_path: str = "",
47
- entry_list: list[int] | list[list[int]] | None = None,
48
- entry_mask: list[bool] | list[list[bool]] | None = None,
49
- field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
50
- buffer_len: int = 3200,
51
- friend: Iterator | None = None,
52
- ) -> None:
53
- warn(
54
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator."
55
- "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
56
- "lgdo.lh5_store will be removed in a future release.",
57
- DeprecationWarning,
58
- stacklevel=2,
59
- )
60
- super().__init__(
61
- lh5_files,
62
- groups,
63
- base_path,
64
- entry_list,
65
- entry_mask,
66
- field_mask,
67
- buffer_len,
68
- friend,
69
- )
70
-
71
- def write_object(
72
- self,
73
- obj: LGDO,
74
- name: str,
75
- lh5_file: str | h5py.File,
76
- group: str | h5py.Group = "/",
77
- start_row: int = 0,
78
- n_rows: int | None = None,
79
- wo_mode: str = "append",
80
- write_start: int = 0,
81
- **h5py_kwargs,
82
- ) -> None:
83
- """
84
- .. warning::
85
- This method is deprecated, use :meth:`lgdo.lh5.iterator.LH5Iterator.write`.
86
-
87
- """
88
- warn(
89
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. "
90
- "The object you are calling this function from uses the old LH5Iterator class."
91
- "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
92
- "lgdo.lh5_store will be removed in a future release.",
93
- DeprecationWarning,
94
- stacklevel=2,
95
- )
96
- self.write(
97
- obj,
98
- name,
99
- lh5_file,
100
- group,
101
- start_row,
102
- n_rows,
103
- wo_mode,
104
- write_start,
105
- h5py_kwargs,
106
- )
107
-
108
- def read_object(
109
- self,
110
- name: str,
111
- lh5_file: str | h5py.File | list[str | h5py.File],
112
- start_row: int = 0,
113
- n_rows: int = sys.maxsize,
114
- idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None,
115
- field_mask: dict[str, bool] | list[str] | tuple[str] | None = None,
116
- obj_buf: LGDO = None,
117
- obj_buf_start: int = 0,
118
- decompress: bool = True,
119
- ) -> tuple[LGDO, int]:
120
- """
121
- .. warning::
122
- This method is deprecated, use :meth:`lgdo.lh5.iterator.LH5Iterator.read`.
123
-
124
- """
125
- warn(
126
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. "
127
- "The object you are calling this function from uses the old LH5Iterator class."
128
- "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
129
- "lgdo.lh5_store will be removed in a future release.",
130
- DeprecationWarning,
131
- stacklevel=2,
132
- )
133
- return self.read(
134
- name,
135
- lh5_file,
136
- start_row,
137
- n_rows,
138
- idx,
139
- field_mask,
140
- obj_buf,
141
- obj_buf_start,
142
- decompress,
143
- )
144
-
145
-
146
- class LH5Store(lh5.LH5Store):
147
- """
148
- .. warning::
149
- This class is deprecated, use :class:`lgdo.lh5.iterator.LH5Store`.
150
-
151
- """
152
-
153
- def __init__(self, base_path: str = "", keep_open: bool = False):
154
- warn(
155
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store. "
156
- "Please replace 'from lgdo.lh5_store import LH5Store' with 'from lgdo.lh5 import LH5Store'."
157
- "lgdo.lh5_store will be removed in a future release.",
158
- DeprecationWarning,
159
- stacklevel=2,
160
- )
161
- super().__init__(base_path, keep_open)
162
-
163
- def read_object(
164
- self,
165
- name: str,
166
- lh5_file: str | h5py.File | list[str | h5py.File],
167
- **kwargs,
168
- ) -> tuple[LGDO, int]:
169
- """
170
- .. warning::
171
- This method is deprecated, use :meth:`lgdo.lh5.store.LH5Store.read`.
172
-
173
- """
174
- warn(
175
- "LH5Store.read_object() has been renamed to LH5Store.read(), "
176
- "Please update your code."
177
- "LH5Store.read_object() will be removed in a future release.",
178
- DeprecationWarning,
179
- stacklevel=2,
180
- )
181
- return super().read(self, name, lh5_file, **kwargs)
182
-
183
- def write_object(
184
- self,
185
- obj: LGDO,
186
- name: str,
187
- lh5_file: str | h5py.File,
188
- **kwargs,
189
- ) -> tuple[LGDO, int]:
190
- """
191
- .. warning::
192
- This method is deprecated, use :meth:`lgdo.lh5.store.LH5Store.write`.
193
-
194
- """
195
- warn(
196
- "LH5Store.write_object() has been renamed to LH5Store.write(), "
197
- "Please update your code."
198
- "LH5Store.write_object() will be removed in a future release.",
199
- DeprecationWarning,
200
- stacklevel=2,
201
- )
202
- return super().read(self, obj, name, lh5_file, **kwargs)
203
-
204
-
205
- def load_dfs(
206
- f_list: str | list[str],
207
- par_list: list[str],
208
- lh5_group: str = "",
209
- idx_list: list[np.ndarray | list | tuple] | None = None,
210
- ) -> pd.DataFrame:
211
- """
212
- .. warning::
213
- This function is deprecated, use :meth:`lgdo.types.lgdo.LGDO.view_as` to
214
- view LGDO data as a Pandas data structure.
215
-
216
- """
217
- warn(
218
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
219
- "Please replace 'from lgdo.lh5_store import load_dfs' with 'from lgdo.lh5 import load_dfs'. "
220
- "lgdo.lh5_store will be removed in a future release.",
221
- DeprecationWarning,
222
- stacklevel=2,
223
- )
224
- return lh5.load_dfs(f_list, par_list, lh5_group, idx_list)
225
-
226
-
227
- def load_nda(
228
- f_list: str | list[str],
229
- par_list: list[str],
230
- lh5_group: str = "",
231
- idx_list: list[np.ndarray | list | tuple] | None = None,
232
- ) -> dict[str, np.ndarray]:
233
- """
234
- .. warning::
235
- This function is deprecated, use :meth:`lgdo.types.lgdo.LGDO.view_as` to
236
- view LGDO data as a NumPy data structure.
237
-
238
- """
239
- warn(
240
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
241
- "Please replace 'from lgdo.lh5_store import load_nda' with 'from lgdo.lh5 import load_nda'. "
242
- "lgdo.lh5_store will be removed in a future release.",
243
- DeprecationWarning,
244
- stacklevel=2,
245
- )
246
- return lh5.load_nda(f_list, par_list, lh5_group, idx_list)
247
-
248
-
249
- def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]:
250
- """
251
- .. warning::
252
- This function is deprecated, import :func:`lgdo.lh5.tools.ls`.
253
-
254
- """
255
- warn(
256
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
257
- "Please replace 'from lgdo.lh5_store import ls' with 'from lgdo.lh5 import ls'. "
258
- "lgdo.lh5_store will be removed in a future release.",
259
- DeprecationWarning,
260
- stacklevel=2,
261
- )
262
- return lh5.ls(lh5_file, lh5_group)
263
-
264
-
265
- def show(
266
- lh5_file: str | h5py.Group,
267
- lh5_group: str = "/",
268
- attrs: bool = False,
269
- indent: str = "",
270
- header: bool = True,
271
- ) -> None:
272
- """
273
- .. warning::
274
- This function is deprecated, import :func:`lgdo.lh5.tools.show`.
275
-
276
- """
277
- warn(
278
- "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
279
- "Please replace 'from lgdo.lh5_store import show' with 'from lgdo.lh5 import show'. "
280
- "lgdo.lh5_store will be removed in a future release.",
281
- DeprecationWarning,
282
- stacklevel=2,
283
- )
284
- lh5.show(lh5_file, lh5_group, attrs, indent, header)