legend-pydataobj 1.12.0a2__py3-none-any.whl → 1.12.0a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: legend_pydataobj
3
- Version: 1.12.0a2
3
+ Version: 1.12.0a4
4
4
  Summary: LEGEND Python Data Objects
5
5
  Author: The LEGEND Collaboration
6
6
  Maintainer: The LEGEND Collaboration
@@ -1,6 +1,6 @@
1
- legend_pydataobj-1.12.0a2.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
1
+ legend_pydataobj-1.12.0a4.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
2
2
  lgdo/__init__.py,sha256=fkRv79kdtBasw31gPVK9SdLQ2vEEajTV2t3UPDvFg9o,3206
3
- lgdo/_version.py,sha256=JnfDM_d42edyo74E887XCyb9fjk-f5Vnz-5nNPPYBpo,515
3
+ lgdo/_version.py,sha256=lVyzAOse2pIwNX9sD_s_ucUhU5oPCN_lFpntrC7eKG8,521
4
4
  lgdo/cli.py,sha256=s_EWTBWW76l7zWb6gaTSTjiT-0RzzcYEmjeFEQCVxfk,4647
5
5
  lgdo/lgdo_utils.py,sha256=6a2YWEwpyEMXlAyTHZMO01aqxy6SxJzPZkGNWKNWuS0,2567
6
6
  lgdo/logging.py,sha256=82wIOj7l7xr3WYyeHdpSXbbjzHJsy-uRyKYUYx2vMfQ,1003
@@ -14,13 +14,13 @@ lgdo/compression/utils.py,sha256=W2RkBrxPpXlat84dnU9Ad7d_tTws0irtGl7O1dNWjnk,114
14
14
  lgdo/compression/varlen.py,sha256=6ZZUItyoOfygDdE0DyoISeFZfqdbH6xl7T0eclfarzg,15127
15
15
  lgdo/lh5/__init__.py,sha256=UTzKGmpgFoHwVB_yNULvJsHD_uQQGl-R87l-3QBkh7w,773
16
16
  lgdo/lh5/concat.py,sha256=BZCgK7TWPKK8fMmha8K83d3bC31FVO1b5LOW7x-Ru1s,6186
17
- lgdo/lh5/core.py,sha256=GjosZGUp4GSO5FtWV9eXUt_6DGU_OwJXODlj5K1j93M,13320
17
+ lgdo/lh5/core.py,sha256=nULH5UoRjUCH0E3Z0-OH_DbFz2PRAQP73Qaf1kfnyPE,13481
18
18
  lgdo/lh5/datatype.py,sha256=O_7BqOlX8PFMyG0ppkfUT5aps5HEqX0bpuKcJO3jhu0,1691
19
19
  lgdo/lh5/exceptions.py,sha256=3kj8avXl4eBGvebl3LG12gJEmw91W0T8PYR0AfvUAyM,1211
20
20
  lgdo/lh5/iterator.py,sha256=1ob9B7Bf3ioGCtZkUZoL6ibTxAwLf4ld8_33ghVVEa4,20498
21
- lgdo/lh5/store.py,sha256=MYbMt-Mc7izELxuyLlSrrYrylCIzxc2CLzZYIVbZ33w,8455
21
+ lgdo/lh5/store.py,sha256=qkBm3gPbr1R2UlQpUuDR5sGRMzpYJBWFL8fDIry6tmQ,8474
22
22
  lgdo/lh5/tools.py,sha256=drtJWHY82wCFuFr6LVVnm2AQgs_wZuFmAvyOB4tcOHs,6431
23
- lgdo/lh5/utils.py,sha256=ioz8DlyXZsejwnU2qYdIccdHcF12H62jgLkZsiDOLSM,6243
23
+ lgdo/lh5/utils.py,sha256=f2H7H1D-RfDN3g_YrVDQEPaHevn5yDJFA-uznK9cgx8,6336
24
24
  lgdo/lh5/_serializers/__init__.py,sha256=eZzxMp1SeZWG0PkEXUiCz3XyprQ8EmelHUmJogC8xYE,1263
25
25
  lgdo/lh5/_serializers/read/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  lgdo/lh5/_serializers/read/array.py,sha256=uWfMCihfAmW2DE2ewip2qCK_kvQC_mb2zvOv26uzijc,1000
@@ -32,7 +32,7 @@ lgdo/lh5/_serializers/read/utils.py,sha256=YfSqPO-83A1XvhhuULxQ0Qz2A5ODa3sb7ApNx
32
32
  lgdo/lh5/_serializers/read/vector_of_vectors.py,sha256=765P8mElGArAaEPkHTAUXFQ47t1_3-3BQAete0LckBQ,7207
33
33
  lgdo/lh5/_serializers/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  lgdo/lh5/_serializers/write/array.py,sha256=66DKnW2yqIBlUGNBPWcE-m4W0B2-nTKusDHGX9m6GY0,3223
35
- lgdo/lh5/_serializers/write/composite.py,sha256=I6lH0nWFIpAfZyG4-0rLxzg3mfazZ_FEhQVp1FZ0aA4,9254
35
+ lgdo/lh5/_serializers/write/composite.py,sha256=JYoLT9intT_Y4xPeL_l7CSd22O0ZKyEmd0flKkWWPFA,9268
36
36
  lgdo/lh5/_serializers/write/scalar.py,sha256=JPt_fcdTKOSFp5hfJdcKIfK4hxhcD8vhOlvDF-7btQ8,763
37
37
  lgdo/lh5/_serializers/write/vector_of_vectors.py,sha256=puGQX9XF5P_5DVbm_Cc6TvPrsDywgBLSYtkqFNltbB4,3493
38
38
  lgdo/types/__init__.py,sha256=DNfOErPiAZg-7Gygkp6ZKAi20Yrm1mfderZHvKo1Y4s,821
@@ -45,11 +45,11 @@ lgdo/types/lgdo.py,sha256=21YNtJCHnSO3M60rjsAdbMO5crDjL_0BtuFpudZ2xvU,4500
45
45
  lgdo/types/scalar.py,sha256=c5Es2vyDqyWTPV6mujzfIzMpC1jNWkEIcvYyWQUxH3Q,1933
46
46
  lgdo/types/struct.py,sha256=Q0OWLVd4B0ciLb8t6VsxU3MPbmGLZ7WfQNno1lSQS0Q,4918
47
47
  lgdo/types/table.py,sha256=FkWesoEA9bmGGSW8Ewig1Zs77ffUoR_nggfYSmkWpjU,20079
48
- lgdo/types/vectorofvectors.py,sha256=-5m3g5w03nqs__Uv4cO36A_7_h_4mJhFpIhzJh3Y5D0,24855
48
+ lgdo/types/vectorofvectors.py,sha256=GbAKV_ehXN4XdWSwnmKS_ErCiudRetcH_3wo7iDrVjw,26854
49
49
  lgdo/types/vovutils.py,sha256=LW3ZcwECxVYxxcFadAtY3nnK-9-rk8Xbg_m8hY30lo4,10708
50
50
  lgdo/types/waveformtable.py,sha256=9S_NMg894NZTGt2pLuskwH4-zQ5EbLnzWI6FVui6fXE,9827
51
- legend_pydataobj-1.12.0a2.dist-info/METADATA,sha256=71-hhjEgQZ9NqNS7FQkFYPALw6VZJf6vDMtCGMrdohE,44445
52
- legend_pydataobj-1.12.0a2.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
53
- legend_pydataobj-1.12.0a2.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
54
- legend_pydataobj-1.12.0a2.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
55
- legend_pydataobj-1.12.0a2.dist-info/RECORD,,
51
+ legend_pydataobj-1.12.0a4.dist-info/METADATA,sha256=eD1QW8NEKGSWEqxSes1-TFnq1VHoxtdLmLfafsB53nI,44445
52
+ legend_pydataobj-1.12.0a4.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
53
+ legend_pydataobj-1.12.0a4.dist-info/entry_points.txt,sha256=0KWfnwbuwhNn0vPUqARukjp04Ca6lzfZBSirouRmk7I,76
54
+ legend_pydataobj-1.12.0a4.dist-info/top_level.txt,sha256=KyR-EUloqiXcQ62IWnzBmtInDtvsHl4q2ZJAZgTcLXE,5
55
+ legend_pydataobj-1.12.0a4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.0.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
lgdo/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.12.0a2'
21
- __version_tuple__ = version_tuple = (1, 12, 0)
20
+ __version__ = version = '1.12.0a4'
21
+ __version_tuple__ = version_tuple = (1, 12, 0, 'a4')
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
- import os
5
4
  from inspect import signature
5
+ from pathlib import Path
6
6
 
7
7
  import h5py
8
8
 
@@ -53,7 +53,7 @@ def _h5_write_lgdo(
53
53
  # change any object in the file. So we use file:append for
54
54
  # write_object:overwrite.
55
55
  if not isinstance(lh5_file, h5py.File):
56
- mode = "w" if wo_mode == "of" or not os.path.exists(lh5_file) else "a"
56
+ mode = "w" if wo_mode == "of" or not Path(lh5_file).exists() else "a"
57
57
  lh5_file = h5py.File(lh5_file, mode=mode, **file_kwargs)
58
58
 
59
59
  log.debug(
lgdo/lh5/core.py CHANGED
@@ -113,7 +113,11 @@ def read(
113
113
  lh5_obj = lh5_file[name]
114
114
  elif isinstance(lh5_file, str):
115
115
  lh5_file = h5py.File(lh5_file, mode="r", locking=locking)
116
- lh5_obj = lh5_file[name]
116
+ try:
117
+ lh5_obj = lh5_file[name]
118
+ except KeyError as ke:
119
+ err = f"Object {name} not found in file {lh5_file.filename}"
120
+ raise KeyError(err) from ke
117
121
  else:
118
122
  if obj_buf is not None:
119
123
  obj_buf.resize(obj_buf_start)
lgdo/lh5/store.py CHANGED
@@ -6,11 +6,11 @@ HDF5 files.
6
6
  from __future__ import annotations
7
7
 
8
8
  import logging
9
- import os
10
9
  import sys
11
10
  from collections import OrderedDict
12
11
  from collections.abc import Mapping, Sequence
13
12
  from inspect import signature
13
+ from pathlib import Path
14
14
  from typing import Any
15
15
 
16
16
  import h5py
@@ -92,16 +92,16 @@ class LH5Store:
92
92
  return self.files[lh5_file]
93
93
 
94
94
  if self.base_path != "":
95
- full_path = os.path.join(self.base_path, lh5_file)
95
+ full_path = Path(self.base_path) / lh5_file
96
96
  else:
97
- full_path = lh5_file
97
+ full_path = Path(lh5_file)
98
98
 
99
- file_exists = os.path.exists(full_path)
99
+ file_exists = full_path.exists()
100
100
  if mode != "r":
101
- directory = os.path.dirname(full_path)
102
- if directory != "" and not os.path.exists(directory):
101
+ directory = full_path.parent
102
+ if directory != "" and not full_path.parent.exists():
103
103
  log.debug(f"making path {directory}")
104
- os.makedirs(directory)
104
+ directory.mkdir(parents=True, exist_ok=True)
105
105
 
106
106
  if mode == "r" and not file_exists:
107
107
  msg = f"file {full_path} not found"
lgdo/lh5/utils.py CHANGED
@@ -7,6 +7,7 @@ import logging
7
7
  import os
8
8
  import string
9
9
  from collections.abc import Mapping, Sequence
10
+ from pathlib import Path
10
11
  from typing import Any
11
12
 
12
13
  import h5py
@@ -153,7 +154,7 @@ def expand_vars(expr: str, substitute: dict[str, str] | None = None) -> str:
153
154
 
154
155
  # use provided mapping
155
156
  # then expand env variables
156
- return os.path.expandvars(string.Template(expr).safe_substitute(substitute))
157
+ return os.path.expandvars(string.Template(str(expr)).safe_substitute(substitute))
157
158
 
158
159
 
159
160
  def expand_path(
@@ -183,14 +184,15 @@ def expand_path(
183
184
  Unique absolute path, or list of all absolute paths
184
185
  """
185
186
  if base_path is not None and base_path != "":
186
- base_path = os.path.expanduser(os.path.expandvars(base_path))
187
- path = os.path.join(base_path, path)
187
+ base_path = Path(os.path.expandvars(base_path)).expanduser()
188
+ path = base_path / path
188
189
 
189
190
  # first expand variables
190
191
  _path = expand_vars(path, substitute)
191
192
 
192
193
  # then expand wildcards
193
- paths = sorted(glob.glob(os.path.expanduser(_path)))
194
+ # pathlib glob works differently so use glob for now
195
+ paths = sorted(glob.glob(str(Path(_path).expanduser()))) # noqa: PTH207
194
196
 
195
197
  if base_path is not None and base_path != "":
196
198
  paths = [os.path.relpath(p, base_path) for p in paths]
@@ -130,20 +130,48 @@ class VectorOfVectors(LGDOCollection):
130
130
 
131
131
  # ak.to_buffer helps in de-serialization
132
132
  # NOTE: ak.to_packed() needed?
133
- form, length, container = ak.to_buffers(ak.to_packed(data))
134
-
135
- # NOTE: node#-data is not even in the dict if the awkward array is empty
136
- # NOTE: if the data arg was a numpy array, to_buffers() preserves
137
- # the original dtype
138
- # FIXME: have to copy the buffers, otherwise self will not own the
139
- # data and self.resize() will fail. Is it possible to avoid this?
140
- flattened_data = np.copy(
141
- container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
142
- )
133
+ form, _, container = ak.to_buffers(ak.to_packed(data))
134
+
135
+ # check if bytestring
136
+ curr = form
137
+ for _ in range(data.ndim - 1):
138
+ curr = curr.content
139
+ if (
140
+ "__array__" in curr.parameters
141
+ and curr.parameters["__array__"] == "bytestring"
142
+ ):
143
+ diffs = np.diff(container[f"node{data.ndim - 1}-offsets"])
144
+ if (diffs != diffs[0]).all():
145
+ err_msg = "Non uniform string lengths not supported"
146
+ raise NotImplementedError(err_msg)
147
+ flattened_data = np.asarray(
148
+ ak.enforce_type(
149
+ ak.unflatten(
150
+ container.pop(
151
+ f"node{data.ndim}-data", np.empty(0, dtype=dtype)
152
+ ),
153
+ diffs[0],
154
+ ),
155
+ "bytes",
156
+ )
157
+ )
143
158
 
144
- # if user-provided dtype is different than dtype from Awkward, cast
145
- # NOTE: makes a copy only if needed
146
- flattened_data = np.asarray(flattened_data, dtype=dtype)
159
+ # if user-provided dtype is different than dtype from Awkward, cast
160
+ # NOTE: makes a copy only if needed
161
+ flattened_data = np.asarray(flattened_data, dtype=dtype)
162
+ else:
163
+ # NOTE: node#-data is not even in the dict if the awkward array is empty
164
+ # NOTE: if the data arg was a numpy array, to_buffers() preserves
165
+ # the original dtype
166
+ # FIXME: have to copy the buffers, otherwise self will not own the
167
+ # data and self.resize() will fail. Is it possible to avoid this?
168
+ flattened_data = np.copy(
169
+ container.pop(f"node{data.ndim - 1}-data", np.empty(0, dtype=dtype))
170
+ )
171
+
172
+ # if user-provided dtype is different than dtype from Awkward, cast
173
+ # NOTE: makes a copy only if needed
174
+ flattened_data = np.asarray(flattened_data, dtype=dtype)
147
175
 
148
176
  # start from innermost VoV and build nested structure
149
177
  for i in range(data.ndim - 2, -1, -1):
@@ -630,11 +658,25 @@ class VectorOfVectors(LGDOCollection):
630
658
  offsets[1:] = self.cumulative_length.nda
631
659
  offsets[0] = 0
632
660
 
633
- content = (
634
- ak.contents.NumpyArray(self.flattened_data.nda)
635
- if self.ndim == 2
636
- else self.flattened_data.view_as(library, with_units=with_units).layout
637
- )
661
+ if self.ndim != 2:
662
+ content = self.flattened_data.view_as(
663
+ library, with_units=with_units
664
+ ).layout
665
+ # need to handle strings separately
666
+ elif np.issubdtype(self.flattened_data.nda.dtype, np.bytes_):
667
+ byte_arrays = []
668
+ for s in self.flattened_data.nda:
669
+ # Convert each string to array of bytes
670
+ byte_array = np.frombuffer(s, dtype=np.uint8)
671
+ byte_arrays.append(byte_array)
672
+ max_len = max(len(b) for b in byte_arrays)
673
+ raw_arrays = ak.contents.NumpyArray(np.concatenate(byte_arrays))
674
+ array_of_chars = ak.contents.RegularArray(
675
+ raw_arrays, max_len, parameters={"__array__": "bytes"}
676
+ )
677
+ content = ak.enforce_type(array_of_chars, "bytes", highlevel=False)
678
+ else:
679
+ content = ak.contents.NumpyArray(self.flattened_data.nda)
638
680
 
639
681
  layout = ak.contents.ListOffsetArray(
640
682
  offsets=ak.index.Index(offsets),